diff options
Diffstat (limited to 'libvpx/vp9')
47 files changed, 3557 insertions, 1892 deletions
diff --git a/libvpx/vp9/common/vp9_alloccommon.c b/libvpx/vp9/common/vp9_alloccommon.c index 7345e259b..5702dca71 100644 --- a/libvpx/vp9/common/vp9_alloccommon.c +++ b/libvpx/vp9/common/vp9_alloccommon.c @@ -17,17 +17,26 @@ #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_onyxc_int.h" -void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) { +void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width, + int height) { const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); + *mi_cols = aligned_width >> MI_SIZE_LOG2; + *mi_rows = aligned_height >> MI_SIZE_LOG2; + *mi_stride = calc_mi_size(*mi_cols); +} - cm->mi_cols = aligned_width >> MI_SIZE_LOG2; - cm->mi_rows = aligned_height >> MI_SIZE_LOG2; - cm->mi_stride = calc_mi_size(cm->mi_cols); +void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows, + int mi_cols) { + *mb_cols = (mi_cols + 1) >> 1; + *mb_rows = (mi_rows + 1) >> 1; + *mb_num = (*mb_rows) * (*mb_cols); +} - cm->mb_cols = (cm->mi_cols + 1) >> 1; - cm->mb_rows = (cm->mi_rows + 1) >> 1; - cm->MBs = cm->mb_rows * cm->mb_cols; +void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) { + vp9_set_mi_size(&cm->mi_rows, &cm->mi_cols, &cm->mi_stride, width, height); + vp9_set_mb_size(&cm->mb_rows, &cm->mb_cols, &cm->MBs, cm->mi_rows, + cm->mi_cols); } static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) { diff --git a/libvpx/vp9/common/vp9_alloccommon.h b/libvpx/vp9/common/vp9_alloccommon.h index 8900038ea..90cbb093d 100644 --- a/libvpx/vp9/common/vp9_alloccommon.h +++ b/libvpx/vp9/common/vp9_alloccommon.h @@ -33,6 +33,11 @@ void vp9_free_postproc_buffers(struct VP9Common *cm); int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height); void vp9_free_state_buffers(struct VP9Common *cm); +void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width, + int height); +void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows, + int mi_cols); + void vp9_set_mb_mi(struct VP9Common *cm, int width, int height); void vp9_swap_current_and_last_seg_map(struct VP9Common *cm); diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h index 2ddc0f121..6ef8127a5 100644 --- a/libvpx/vp9/common/vp9_blockd.h +++ b/libvpx/vp9/common/vp9_blockd.h @@ -60,6 +60,7 @@ typedef struct { #define GOLDEN_FRAME 2 #define ALTREF_FRAME 3 #define MAX_REF_FRAMES 4 +#define MAX_INTER_REF_FRAMES 3 typedef int8_t MV_REFERENCE_FRAME; diff --git a/libvpx/vp9/common/vp9_mv.h b/libvpx/vp9/common/vp9_mv.h index 14dde7dd0..76f93cf0b 100644 --- a/libvpx/vp9/common/vp9_mv.h +++ b/libvpx/vp9/common/vp9_mv.h @@ -19,6 +19,8 @@ extern "C" { #endif +#define INVALID_MV 0x80008000 + typedef struct mv { int16_t row; int16_t col; diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h index 662b8ef5e..f3942a8f0 100644 --- a/libvpx/vp9/common/vp9_onyxc_int.h +++ b/libvpx/vp9/common/vp9_onyxc_int.h @@ -244,14 +244,6 @@ typedef struct VP9Common { int byte_alignment; int skip_loop_filter; - // Private data associated with the frame buffer callbacks. - void *cb_priv; - vpx_get_frame_buffer_cb_fn_t get_fb_cb; - vpx_release_frame_buffer_cb_fn_t release_fb_cb; - - // Handles memory for the codec. - InternalFrameBufferList int_frame_buffers; - // External BufferPool passed from outside. BufferPool *buffer_pool; @@ -262,6 +254,34 @@ typedef struct VP9Common { int lf_row; } VP9_COMMON; +typedef struct { + int frame_width; + int frame_height; + int render_frame_width; + int render_frame_height; + int mi_rows; + int mi_cols; + int mb_rows; + int mb_cols; + int num_mbs; + vpx_bit_depth_t bit_depth; +} FRAME_INFO; + +static INLINE void init_frame_info(FRAME_INFO *frame_info, + const VP9_COMMON *cm) { + frame_info->frame_width = cm->width; + frame_info->frame_height = cm->height; + frame_info->render_frame_width = cm->render_width; + frame_info->render_frame_height = cm->render_height; + frame_info->mi_cols = cm->mi_cols; + frame_info->mi_rows = cm->mi_rows; + frame_info->mb_cols = cm->mb_cols; + frame_info->mb_rows = cm->mb_rows; + frame_info->num_mbs = cm->MBs; + frame_info->bit_depth = cm->bit_depth; + // TODO(angiebird): Figure out how to get subsampling_x/y here +} + static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) { if (index < 0 || index >= FRAME_BUFFERS) return NULL; if (cm->error.error_code != VPX_CODEC_OK) return NULL; diff --git a/libvpx/vp9/common/vp9_postproc.c b/libvpx/vp9/common/vp9_postproc.c index 5373b0218..d2c8535b0 100644 --- a/libvpx/vp9/common/vp9_postproc.c +++ b/libvpx/vp9/common/vp9_postproc.c @@ -183,7 +183,8 @@ void vp9_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch, int rows, int cols, } #endif // CONFIG_VP9_HIGHBITDEPTH -static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, +static void deblock_and_de_macro_block(VP9_COMMON *cm, + YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, int flag, uint8_t *limits) { @@ -216,7 +217,7 @@ static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, source->uv_height, source->uv_width, ppl); } else { #endif // CONFIG_VP9_HIGHBITDEPTH - vp9_deblock(source, post, q, limits); + vp9_deblock(cm, source, post, q, limits); vpx_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); vpx_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, @@ -226,8 +227,8 @@ static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, #endif // CONFIG_VP9_HIGHBITDEPTH } -void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, - uint8_t *limits) { +void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) { const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q + 0.0065 + 0.5); #if CONFIG_VP9_HIGHBITDEPTH @@ -252,9 +253,8 @@ void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, } else { #endif // CONFIG_VP9_HIGHBITDEPTH int mbr; - const int mb_rows = src->y_height / 16; - const int mb_cols = src->y_width / 16; - + const int mb_rows = cm->mb_rows; + const int mb_cols = cm->mb_cols; memset(limits, (unsigned char)ppl, 16 * mb_cols); for (mbr = 0; mbr < mb_rows; mbr++) { @@ -276,9 +276,9 @@ void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, #endif // CONFIG_VP9_HIGHBITDEPTH } -void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, - uint8_t *limits) { - vp9_deblock(src, dst, q, limits); +void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) { + vp9_deblock(cm, src, dst, q, limits); } static void swap_mi_and_prev_mi(VP9_COMMON *cm) { @@ -383,21 +383,21 @@ int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, vpx_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int); } if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) { - deblock_and_de_macro_block(&cm->post_proc_buffer_int, ppbuf, + deblock_and_de_macro_block(cm, &cm->post_proc_buffer_int, ppbuf, q + (ppflags->deblocking_level - 5) * 10, 1, 0, cm->postproc_state.limits); } else if (flags & VP9D_DEBLOCK) { - vp9_deblock(&cm->post_proc_buffer_int, ppbuf, q, + vp9_deblock(cm, &cm->post_proc_buffer_int, ppbuf, q, cm->postproc_state.limits); } else { vpx_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf); } } else if (flags & VP9D_DEMACROBLOCK) { - deblock_and_de_macro_block(cm->frame_to_show, ppbuf, + deblock_and_de_macro_block(cm, cm->frame_to_show, ppbuf, q + (ppflags->deblocking_level - 5) * 10, 1, 0, cm->postproc_state.limits); } else if (flags & VP9D_DEBLOCK) { - vp9_deblock(cm->frame_to_show, ppbuf, q, cm->postproc_state.limits); + vp9_deblock(cm, cm->frame_to_show, ppbuf, q, cm->postproc_state.limits); } else { vpx_yv12_copy_frame(cm->frame_to_show, ppbuf); } diff --git a/libvpx/vp9/common/vp9_postproc.h b/libvpx/vp9/common/vp9_postproc.h index 67efc1b4e..bbe3aed83 100644 --- a/libvpx/vp9/common/vp9_postproc.h +++ b/libvpx/vp9/common/vp9_postproc.h @@ -40,11 +40,11 @@ struct VP9Common; int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *ppflags, int unscaled_width); -void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, - uint8_t *limits); +void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits); -void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, - uint8_t *limits); +void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits); #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c index 04f41e6a3..ff59ff504 100644 --- a/libvpx/vp9/common/vp9_reconinter.c +++ b/libvpx/vp9/common/vp9_reconinter.c @@ -96,8 +96,8 @@ MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, int bw, const int spel_right = spel_left - SUBPEL_SHIFTS; const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS; const int spel_bottom = spel_top - SUBPEL_SHIFTS; - MV clamped_mv = { src_mv->row * (1 << (1 - ss_y)), - src_mv->col * (1 << (1 - ss_x)) }; + MV clamped_mv = { (short)(src_mv->row * (1 << (1 - ss_y))), + (short)(src_mv->col * (1 << (1 - ss_x))) }; assert(ss_x <= 1); assert(ss_y <= 1); diff --git a/libvpx/vp9/common/vp9_thread_common.c b/libvpx/vp9/common/vp9_thread_common.c index c79d9b7f0..b3d50162b 100644 --- a/libvpx/vp9/common/vp9_thread_common.c +++ b/libvpx/vp9/common/vp9_thread_common.c @@ -298,7 +298,10 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, pthread_cond_init(&lf_sync->cond[i], NULL); } } - pthread_mutex_init(&lf_sync->lf_mutex, NULL); + + CHECK_MEM_ERROR(cm, lf_sync->lf_mutex, + vpx_malloc(sizeof(*lf_sync->lf_mutex))); + pthread_mutex_init(lf_sync->lf_mutex, NULL); CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex, vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows)); @@ -339,47 +342,50 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, // Deallocate lf synchronization related mutex and data void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { - if (lf_sync != NULL) { + assert(lf_sync != NULL); + #if CONFIG_MULTITHREAD + if (lf_sync->mutex != NULL) { int i; - - if (lf_sync->mutex != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_mutex_destroy(&lf_sync->mutex[i]); - } - vpx_free(lf_sync->mutex); + for (i = 0; i < lf_sync->rows; ++i) { + pthread_mutex_destroy(&lf_sync->mutex[i]); } - if (lf_sync->cond != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_cond_destroy(&lf_sync->cond[i]); - } - vpx_free(lf_sync->cond); + vpx_free(lf_sync->mutex); + } + if (lf_sync->cond != NULL) { + int i; + for (i = 0; i < lf_sync->rows; ++i) { + pthread_cond_destroy(&lf_sync->cond[i]); } - if (lf_sync->recon_done_mutex != NULL) { - int i; - for (i = 0; i < lf_sync->rows; ++i) { - pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]); - } - vpx_free(lf_sync->recon_done_mutex); + vpx_free(lf_sync->cond); + } + if (lf_sync->recon_done_mutex != NULL) { + int i; + for (i = 0; i < lf_sync->rows; ++i) { + pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]); } + vpx_free(lf_sync->recon_done_mutex); + } - pthread_mutex_destroy(&lf_sync->lf_mutex); - if (lf_sync->recon_done_cond != NULL) { - int i; - for (i = 0; i < lf_sync->rows; ++i) { - pthread_cond_destroy(&lf_sync->recon_done_cond[i]); - } - vpx_free(lf_sync->recon_done_cond); + if (lf_sync->lf_mutex != NULL) { + pthread_mutex_destroy(lf_sync->lf_mutex); + vpx_free(lf_sync->lf_mutex); + } + if (lf_sync->recon_done_cond != NULL) { + int i; + for (i = 0; i < lf_sync->rows; ++i) { + pthread_cond_destroy(&lf_sync->recon_done_cond[i]); } + vpx_free(lf_sync->recon_done_cond); + } #endif // CONFIG_MULTITHREAD - vpx_free(lf_sync->lfdata); - vpx_free(lf_sync->cur_sb_col); - vpx_free(lf_sync->num_tiles_done); - // clear the structure as the source of this call may be a resize in which - // case this call will be followed by an _alloc() which may fail. - vp9_zero(*lf_sync); - } + vpx_free(lf_sync->lfdata); + vpx_free(lf_sync->cur_sb_col); + vpx_free(lf_sync->num_tiles_done); + // clear the structure as the source of this call may be a resize in which + // case this call will be followed by an _alloc() which may fail. + vp9_zero(*lf_sync); } static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) { @@ -390,7 +396,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) { #if CONFIG_MULTITHREAD const int tile_cols = 1 << cm->log2_tile_cols; - pthread_mutex_lock(&lf_sync->lf_mutex); + pthread_mutex_lock(lf_sync->lf_mutex); if (cm->lf_row < max_rows) { cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2; return_val = cm->lf_row; @@ -401,7 +407,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) { cur_row += 1; } } - pthread_mutex_unlock(&lf_sync->lf_mutex); + pthread_mutex_unlock(lf_sync->lf_mutex); if (return_val == -1) return return_val; @@ -411,7 +417,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) { &lf_sync->recon_done_mutex[cur_row]); } pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]); - pthread_mutex_lock(&lf_sync->lf_mutex); + pthread_mutex_lock(lf_sync->lf_mutex); if (lf_sync->corrupted) { int row = return_val >> MI_BLOCK_SIZE_LOG2; pthread_mutex_lock(&lf_sync->mutex[row]); @@ -420,7 +426,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) { pthread_mutex_unlock(&lf_sync->mutex[row]); return_val = -1; } - pthread_mutex_unlock(&lf_sync->lf_mutex); + pthread_mutex_unlock(lf_sync->lf_mutex); #else (void)lf_sync; if (cm->lf_row < max_rows) { @@ -455,9 +461,9 @@ void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync) { void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row, int corrupted) { #if CONFIG_MULTITHREAD - pthread_mutex_lock(&lf_sync->lf_mutex); + pthread_mutex_lock(lf_sync->lf_mutex); lf_sync->corrupted |= corrupted; - pthread_mutex_unlock(&lf_sync->lf_mutex); + pthread_mutex_unlock(lf_sync->lf_mutex); pthread_mutex_lock(&lf_sync->recon_done_mutex[row]); lf_sync->num_tiles_done[row] += 1; if (num_tiles == lf_sync->num_tiles_done[row]) { diff --git a/libvpx/vp9/common/vp9_thread_common.h b/libvpx/vp9/common/vp9_thread_common.h index 94c9de659..5df0117f1 100644 --- a/libvpx/vp9/common/vp9_thread_common.h +++ b/libvpx/vp9/common/vp9_thread_common.h @@ -40,7 +40,7 @@ typedef struct VP9LfSyncData { int num_active_workers; // number of scheduled workers. #if CONFIG_MULTITHREAD - pthread_mutex_t lf_mutex; + pthread_mutex_t *lf_mutex; pthread_mutex_t *recon_done_mutex; pthread_cond_t *recon_done_cond; #endif diff --git a/libvpx/vp9/decoder/vp9_decodeframe.c b/libvpx/vp9/decoder/vp9_decodeframe.c index 7d66cb2b2..e8b386994 100644 --- a/libvpx/vp9/decoder/vp9_decodeframe.c +++ b/libvpx/vp9/decoder/vp9_decodeframe.c @@ -529,16 +529,15 @@ static void high_build_mc_border(const uint8_t *src8, int src_stride, #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH -static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, - int x0, int y0, int b_w, int b_h, - int frame_width, int frame_height, +static void extend_and_predict(TileWorkerData *twd, const uint8_t *buf_ptr1, + int pre_buf_stride, int x0, int y0, int b_w, + int b_h, int frame_width, int frame_height, int border_offset, uint8_t *const dst, int dst_buf_stride, int subpel_x, int subpel_y, const InterpKernel *kernel, const struct scale_factors *sf, MACROBLOCKD *xd, int w, int h, int ref, int xs, int ys) { - DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]); - + uint16_t *mc_buf_high = twd->extend_and_predict_buf; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, x0, y0, b_w, b_h, frame_width, frame_height); @@ -554,15 +553,15 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, } } #else -static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, - int x0, int y0, int b_w, int b_h, - int frame_width, int frame_height, +static void extend_and_predict(TileWorkerData *twd, const uint8_t *buf_ptr1, + int pre_buf_stride, int x0, int y0, int b_w, + int b_h, int frame_width, int frame_height, int border_offset, uint8_t *const dst, int dst_buf_stride, int subpel_x, int subpel_y, const InterpKernel *kernel, const struct scale_factors *sf, int w, int h, int ref, int xs, int ys) { - DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]); + uint8_t *mc_buf = (uint8_t *)twd->extend_and_predict_buf; const uint8_t *buf_ptr; build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, x0, y0, b_w, b_h, @@ -575,8 +574,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, #endif // CONFIG_VP9_HIGHBITDEPTH static void dec_build_inter_predictors( - MACROBLOCKD *xd, int plane, int bw, int bh, int x, int y, int w, int h, - int mi_x, int mi_y, const InterpKernel *kernel, + TileWorkerData *twd, MACROBLOCKD *xd, int plane, int bw, int bh, int x, + int y, int w, int h, int mi_x, int mi_y, const InterpKernel *kernel, const struct scale_factors *sf, struct buf_2d *pre_buf, struct buf_2d *dst_buf, const MV *mv, RefCntBuffer *ref_frame_buf, int is_scaled, int ref) { @@ -687,9 +686,9 @@ static void dec_build_inter_predictors( const int b_h = y1 - y0 + 1; const int border_offset = y_pad * 3 * b_w + x_pad * 3; - extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h, frame_width, - frame_height, border_offset, dst, dst_buf->stride, - subpel_x, subpel_y, kernel, sf, + extend_and_predict(twd, buf_ptr1, buf_stride, x0, y0, b_w, b_h, + frame_width, frame_height, border_offset, dst, + dst_buf->stride, subpel_x, subpel_y, kernel, sf, #if CONFIG_VP9_HIGHBITDEPTH xd, #endif @@ -712,7 +711,8 @@ static void dec_build_inter_predictors( #endif // CONFIG_VP9_HIGHBITDEPTH } -static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, +static void dec_build_inter_predictors_sb(TileWorkerData *twd, + VP9Decoder *const pbi, MACROBLOCKD *xd, int mi_row, int mi_col) { int plane; @@ -755,10 +755,10 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, for (y = 0; y < num_4x4_h; ++y) { for (x = 0; x < num_4x4_w; ++x) { const MV mv = average_split_mvs(pd, mi, ref, i++); - dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 4 * x, 4 * y, - 4, 4, mi_x, mi_y, kernel, sf, pre_buf, - dst_buf, &mv, ref_frame_buf, is_scaled, - ref); + dec_build_inter_predictors(twd, xd, plane, n4w_x4, n4h_x4, 4 * x, + 4 * y, 4, 4, mi_x, mi_y, kernel, sf, + pre_buf, dst_buf, &mv, ref_frame_buf, + is_scaled, ref); } } } @@ -772,7 +772,7 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, const int n4w_x4 = 4 * num_4x4_w; const int n4h_x4 = 4 * num_4x4_h; struct buf_2d *const pre_buf = &pd->pre[ref]; - dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4, + dec_build_inter_predictors(twd, xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel, sf, pre_buf, dst_buf, &mv, ref_frame_buf, is_scaled, ref); } @@ -964,7 +964,7 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, } } else { // Prediction - dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); + dec_build_inter_predictors_sb(twd, pbi, xd, mi_row, mi_col); #if CONFIG_MISMATCH_DEBUG { int plane; @@ -1048,7 +1048,7 @@ static void recon_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, predict_and_reconstruct_intra_block_row_mt); } else { // Prediction - dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); + dec_build_inter_predictors_sb(twd, pbi, xd, mi_row, mi_col); // Reconstruction if (!mi->skip) { @@ -1733,9 +1733,9 @@ static int lpf_map_write_check(VP9LfSync *lf_sync, int row, int num_tile_cols) { int return_val = 0; #if CONFIG_MULTITHREAD int corrupted; - pthread_mutex_lock(&lf_sync->lf_mutex); + pthread_mutex_lock(lf_sync->lf_mutex); corrupted = lf_sync->corrupted; - pthread_mutex_unlock(&lf_sync->lf_mutex); + pthread_mutex_unlock(lf_sync->lf_mutex); if (!corrupted) { pthread_mutex_lock(&lf_sync->recon_done_mutex[row]); lf_sync->num_tiles_done[row] += 1; @@ -1905,6 +1905,7 @@ static int row_decode_worker_hook(void *arg1, void *arg2) { LFWorkerData *lf_data = thread_data->lf_data; VP9LfSync *lf_sync = thread_data->lf_sync; volatile int corrupted = 0; + TileWorkerData *volatile tile_data_recon = NULL; while (!vp9_jobq_dequeue(&row_mt_worker_data->jobq, &job, sizeof(job), 1)) { int mi_col; @@ -1921,9 +1922,10 @@ static int row_decode_worker_hook(void *arg1, void *arg2) { } else if (job.job_type == RECON_JOB) { const int cur_sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; const int is_last_row = sb_rows - 1 == cur_sb_row; - TileWorkerData twd_recon; - TileWorkerData *const tile_data_recon = &twd_recon; int mi_col_start, mi_col_end; + if (!tile_data_recon) + CHECK_MEM_ERROR(cm, tile_data_recon, + vpx_memalign(32, sizeof(TileWorkerData))); tile_data_recon->xd = pbi->mb; vp9_tile_init(&tile_data_recon->xd.tile, cm, 0, job.tile_col); @@ -2006,6 +2008,7 @@ static int row_decode_worker_hook(void *arg1, void *arg2) { } } + vpx_free(tile_data_recon); return !corrupted; } diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c index 943fe478a..49c675394 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.c +++ b/libvpx/vp9/decoder/vp9_decodemv.c @@ -444,17 +444,6 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, } } -static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv, - int refmv_count) { - int i; - - // Make sure all the candidates are properly clamped etc - for (i = 0; i < refmv_count; ++i) { - lower_mv_precision(&mvlist[i].as_mv, allow_hp); - *best_mv = mvlist[i]; - } -} - // This macro is used to add a motion vector mv_ref list if it isn't // already in the list. If it's the second motion vector or early_break // it will also skip all additional processing and jump to Done! @@ -494,7 +483,7 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, const POSITION *const mv_ref_search, int_mv *mv_ref_list, int mi_row, int mi_col, - int block, int is_sub8x8) { + int block) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; int different_ref_found = 0; @@ -511,7 +500,7 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); i = 0; - if (is_sub8x8) { + if (block >= 0) { // If the size < 8x8 we get the mv from the bmi substructure for the // nearest two blocks. for (i = 0; i < 2; ++i) { @@ -628,19 +617,22 @@ static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, assert(MAX_MV_REF_CANDIDATES == 2); - refmv_count = - dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, - mv_list, mi_row, mi_col, block, 1); - switch (block) { - case 0: best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; break; + case 0: + refmv_count = + dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, + mv_list, mi_row, mi_col, block); + best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; + break; case 1: case 2: if (b_mode == NEARESTMV) { best_sub8x8->as_int = bmi[0].as_mv[ref].as_int; } else { + dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, + mv_list, mi_row, mi_col, block); best_sub8x8->as_int = 0; - for (n = 0; n < refmv_count; ++n) + for (n = 0; n < 2; ++n) if (bmi[0].as_mv[ref].as_int != mv_list[n].as_int) { best_sub8x8->as_int = mv_list[n].as_int; break; @@ -651,15 +643,20 @@ static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, if (b_mode == NEARESTMV) { best_sub8x8->as_int = bmi[2].as_mv[ref].as_int; } else { - int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; - candidates[0] = bmi[1].as_mv[ref]; - candidates[1] = bmi[0].as_mv[ref]; - candidates[2] = mv_list[0]; - candidates[3] = mv_list[1]; best_sub8x8->as_int = 0; - for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) - if (bmi[2].as_mv[ref].as_int != candidates[n].as_int) { - best_sub8x8->as_int = candidates[n].as_int; + if (bmi[2].as_mv[ref].as_int != bmi[1].as_mv[ref].as_int) { + best_sub8x8->as_int = bmi[1].as_mv[ref].as_int; + break; + } + if (bmi[2].as_mv[ref].as_int != bmi[0].as_mv[ref].as_int) { + best_sub8x8->as_int = bmi[0].as_mv[ref].as_int; + break; + } + dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, + mv_list, mi_row, mi_col, block); + for (n = 0; n < 2; ++n) + if (bmi[2].as_mv[ref].as_int != mv_list[n].as_int) { + best_sub8x8->as_int = mv_list[n].as_int; break; } } @@ -715,26 +712,6 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, } else { if (bsize >= BLOCK_8X8) mi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx); - else - // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV. - // Setting mode to NEARESTMV forces the search to stop after the nearestmv - // has been found. After b_modes have been read, mode will be overwritten - // by the last b_mode. - mi->mode = NEARESTMV; - - if (mi->mode != ZEROMV) { - for (ref = 0; ref < 1 + is_compound; ++ref) { - int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; - const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; - int refmv_count; - - refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, - tmp_mvs, mi_row, mi_col, -1, 0); - - dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref], - refmv_count); - } - } } mi->interp_filter = (cm->interp_filter == SWITCHABLE) @@ -746,6 +723,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, const int num_4x4_h = 1 << xd->bmode_blocks_hl; int idx, idy; PREDICTION_MODE b_mode; + int got_mv_refs_for_new = 0; int_mv best_sub8x8[2]; const uint32_t invalid_mv = 0x80008000; // Initialize the 2nd element as even though it won't be used meaningfully @@ -760,6 +738,18 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, for (ref = 0; ref < 1 + is_compound; ++ref) append_sub8x8_mvs_for_idx(cm, xd, mv_ref_search, b_mode, j, ref, mi_row, mi_col, &best_sub8x8[ref]); + } else if (b_mode == NEWMV && !got_mv_refs_for_new) { + for (ref = 0; ref < 1 + is_compound; ++ref) { + int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; + const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; + + dec_find_mv_refs(cm, xd, NEWMV, frame, mv_ref_search, tmp_mvs, + mi_row, mi_col, -1); + + lower_mv_precision(&tmp_mvs[0].as_mv, allow_hp); + best_ref_mvs[ref] = tmp_mvs[0]; + got_mv_refs_for_new = 1; + } } if (!assign_mv(cm, xd, b_mode, mi->bmi[j].as_mv, best_ref_mvs, @@ -777,6 +767,17 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, copy_mv_pair(mi->mv, mi->bmi[3].as_mv); } else { + if (mi->mode != ZEROMV) { + for (ref = 0; ref < 1 + is_compound; ++ref) { + int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; + const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; + int refmv_count = + dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, tmp_mvs, + mi_row, mi_col, -1); + lower_mv_precision(&tmp_mvs[refmv_count - 1].as_mv, allow_hp); + best_ref_mvs[ref] = tmp_mvs[refmv_count - 1]; + } + } xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs, best_ref_mvs, is_compound, allow_hp, r); } diff --git a/libvpx/vp9/decoder/vp9_decoder.h b/libvpx/vp9/decoder/vp9_decoder.h index 4a22aa6b5..b0ef83c73 100644 --- a/libvpx/vp9/decoder/vp9_decoder.h +++ b/libvpx/vp9/decoder/vp9_decoder.h @@ -55,6 +55,7 @@ typedef struct TileWorkerData { DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); + DECLARE_ALIGNED(16, uint16_t, extend_and_predict_buf[80 * 2 * 80 * 2]); struct vpx_internal_error_info error_info; } TileWorkerData; diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c index e250a5a35..c2e6b3d54 100644 --- a/libvpx/vp9/decoder/vp9_detokenize.c +++ b/libvpx/vp9/decoder/vp9_detokenize.c @@ -243,9 +243,9 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type, #endif // CONFIG_VP9_HIGHBITDEPTH #else if (read_bool(r, 128, &value, &count, &range)) { - dqcoeff[scan[c]] = -v; + dqcoeff[scan[c]] = (tran_low_t)-v; } else { - dqcoeff[scan[c]] = v; + dqcoeff[scan[c]] = (tran_low_t)v; } #endif // CONFIG_COEFFICIENT_RANGE_CHECKING ++c; diff --git a/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c b/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c index 8b62b450c..d75a48179 100644 --- a/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c +++ b/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c @@ -26,6 +26,22 @@ #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/vpx_dsp_common.h" +static INLINE void calculate_dqcoeff_and_store(const int16x8_t qcoeff, + const int16x8_t dequant, + tran_low_t *dqcoeff) { + const int32x4_t dqcoeff_0 = + vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant)); + const int32x4_t dqcoeff_1 = + vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant)); + +#if CONFIG_VP9_HIGHBITDEPTH + vst1q_s32(dqcoeff, dqcoeff_0); + vst1q_s32(dqcoeff + 4, dqcoeff_1); +#else + vst1q_s16(dqcoeff, vcombine_s16(vmovn_s32(dqcoeff_0), vmovn_s32(dqcoeff_1))); +#endif // CONFIG_VP9_HIGHBITDEPTH +} + void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, @@ -55,7 +71,8 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t v_iscan = vld1q_s16(&iscan[0]); const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); - const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); + const int16x8_t v_abs = vabsq_s16(v_coeff); + const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round); const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); const int32x4_t v_tmp_hi = @@ -67,10 +84,9 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); - const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); + calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr); v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); store_s16q_to_tran_low(qcoeff_ptr, v_qcoeff); - store_s16q_to_tran_low(dqcoeff_ptr, v_dqcoeff); v_round = vmovq_n_s16(round_ptr[1]); v_quant = vmovq_n_s16(quant_ptr[1]); v_dequant = vmovq_n_s16(dequant_ptr[1]); @@ -80,7 +96,8 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t v_iscan = vld1q_s16(&iscan[i]); const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr + i); const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); - const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); + const int16x8_t v_abs = vabsq_s16(v_coeff); + const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round); const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); const int32x4_t v_tmp_hi = @@ -92,10 +109,9 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); - const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); + calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr + i); v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); store_s16q_to_tran_low(qcoeff_ptr + i, v_qcoeff); - store_s16q_to_tran_low(dqcoeff_ptr + i, v_dqcoeff); } #ifdef __aarch64__ *eob_ptr = vmaxvq_s16(v_eobmax_76543210); @@ -146,9 +162,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t dequant_mask = vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh)); - int16x8_t qcoeff = vaddq_s16(coeff_abs, round); + int16x8_t qcoeff = vqaddq_s16(coeff_abs, round); int32x4_t dqcoeff_0, dqcoeff_1; - int16x8_t dqcoeff; uint16x8_t eob_max; (void)scan; (void)count; @@ -170,13 +185,17 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, // Add 1 if negative to round towards zero because the C uses division. dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0)); dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1)); - - dqcoeff = vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1)); +#if CONFIG_VP9_HIGHBITDEPTH + vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1)); + vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1)); +#else + store_s16q_to_tran_low(dqcoeff_ptr, vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), + vshrn_n_s32(dqcoeff_1, 1))); +#endif eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan); store_s16q_to_tran_low(qcoeff_ptr, qcoeff); - store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff); iscan += 8; coeff_ptr += 8; @@ -200,9 +219,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t dequant_mask = vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh)); - int16x8_t qcoeff = vaddq_s16(coeff_abs, round); + int16x8_t qcoeff = vqaddq_s16(coeff_abs, round); int32x4_t dqcoeff_0, dqcoeff_1; - int16x8_t dqcoeff; qcoeff = vqdmulhq_s16(qcoeff, quant); qcoeff = veorq_s16(qcoeff, coeff_sign); @@ -215,14 +233,19 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0)); dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1)); - dqcoeff = - vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1)); +#if CONFIG_VP9_HIGHBITDEPTH + vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1)); + vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1)); +#else + store_s16q_to_tran_low( + dqcoeff_ptr, + vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1))); +#endif eob_max = vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan)); store_s16q_to_tran_low(qcoeff_ptr, qcoeff); - store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff); iscan += 8; coeff_ptr += 8; diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c index adb12c10c..858a41654 100644 --- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -187,7 +187,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, MODE_INFO *const mi, // If this block is labeled for refresh, check if we should reset the // segment_id. - if (cyclic_refresh_segment_id_boosted(mi->segment_id)) { + if (cpi->sf.use_nonrd_pick_mode && + cyclic_refresh_segment_id_boosted(mi->segment_id)) { mi->segment_id = refresh_this_block; // Reset segment_id if it will be skipped. if (skip) mi->segment_id = CR_SEGMENT_ID_BASE; diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c index d47b411fa..9eddf545e 100644 --- a/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/libvpx/vp9/encoder/vp9_encodeframe.c @@ -1214,8 +1214,8 @@ static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, if (is_key_frame) return; - // For speed >= 8, avoid the chroma check if y_sad is above threshold. - if (cpi->oxcf.speed >= 8) { + // For speed > 8, avoid the chroma check if y_sad is above threshold. + if (cpi->oxcf.speed > 8) { if (y_sad > cpi->vbp_thresholds[1] && (!cpi->noise_estimate.enabled || vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium)) @@ -4248,13 +4248,21 @@ static int rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (cpi->sf.prune_ref_frame_for_rect_partitions) { uint8_t used_frames; used_frames = ref_frames_used[0] | ref_frames_used[1]; - if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames; + if (used_frames) { + pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames & 0xff; + } used_frames = ref_frames_used[2] | ref_frames_used[3]; - if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames; + if (used_frames) { + pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames & 0xff; + } used_frames = ref_frames_used[0] | ref_frames_used[2]; - if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames; + if (used_frames) { + pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames & 0xff; + } used_frames = ref_frames_used[1] | ref_frames_used[3]; - if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames; + if (used_frames) { + pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames & 0xff; + } } { diff --git a/libvpx/vp9/encoder/vp9_encoder.c b/libvpx/vp9/encoder/vp9_encoder.c index 7f82a470b..b1a81c04a 100644 --- a/libvpx/vp9/encoder/vp9_encoder.c +++ b/libvpx/vp9/encoder/vp9_encoder.c @@ -80,6 +80,7 @@ #include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_temporal_filter.h" +#include "vp9/vp9_cx_iface.h" #define AM_SEGMENT_ID_INACTIVE 7 #define AM_SEGMENT_ID_ACTIVE 0 @@ -459,8 +460,8 @@ static int compute_context_model_diff(const VP9_COMMON *const cm) { #endif // !CONFIG_REALTIME_ONLY // Test for whether to calculate metrics for the frame. -static int is_psnr_calc_enabled(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; +static int is_psnr_calc_enabled(const VP9_COMP *cpi) { + const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame; @@ -822,8 +823,28 @@ static void setup_frame(VP9_COMP *cpi) { // layer ARF case as well. if (cpi->multi_layer_arf && !cpi->use_svc) { GF_GROUP *const gf_group = &cpi->twopass.gf_group; - cm->frame_context_idx = clamp(gf_group->layer_depth[gf_group->index] - 1, 0, - FRAME_CONTEXTS - 1); + const int gf_group_index = gf_group->index; + const int boost_frame = + !cpi->rc.is_src_frame_alt_ref && + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame); + + // frame_context_idx Frame Type + // 0 Intra only frame, base layer ARF + // 1 ARFs with layer depth = 2,3 + // 2 ARFs with layer depth > 3 + // 3 Non-boosted frames + if (frame_is_intra_only(cm)) { + cm->frame_context_idx = 0; + } else if (boost_frame) { + if (gf_group->rf_level[gf_group_index] == GF_ARF_STD) + cm->frame_context_idx = 0; + else if (gf_group->layer_depth[gf_group_index] <= 3) + cm->frame_context_idx = 1; + else + cm->frame_context_idx = 2; + } else { + cm->frame_context_idx = 3; + } } if (cm->frame_type == KEY_FRAME) { @@ -1436,7 +1457,6 @@ static void init_level_constraint(LevelConstraint *lc) { lc->level_index = -1; lc->max_cpb_size = INT_MAX; lc->max_frame_size = INT_MAX; - lc->rc_config_updated = 0; lc->fail_flag = 0; } @@ -1448,7 +1468,7 @@ static void set_level_constraint(LevelConstraint *ls, int8_t level_index) { } } -static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { +static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; cpi->oxcf = *oxcf; @@ -1513,13 +1533,15 @@ static void set_rc_buffer_sizes(RATE_CONTROL *rc, } #if CONFIG_VP9_HIGHBITDEPTH +// TODO(angiebird): make sdx8f available for highbitdepth if needed #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \ cpi->fn_ptr[BT].sdf = SDF; \ cpi->fn_ptr[BT].sdaf = SDAF; \ cpi->fn_ptr[BT].vf = VF; \ cpi->fn_ptr[BT].svf = SVF; \ cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; + cpi->fn_ptr[BT].sdx4df = SDX4DF; \ + cpi->fn_ptr[BT].sdx8f = NULL; #define MAKE_BFP_SAD_WRAPPER(fnname) \ static unsigned int fnname##_bits8(const uint8_t *src_ptr, \ @@ -2137,7 +2159,112 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { } while (++i <= MV_MAX); } -VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, +static void init_ref_frame_bufs(VP9_COMMON *cm) { + int i; + BufferPool *const pool = cm->buffer_pool; + cm->new_fb_idx = INVALID_IDX; + for (i = 0; i < REF_FRAMES; ++i) { + cm->ref_frame_map[i] = INVALID_IDX; + } + for (i = 0; i < FRAME_BUFFERS; ++i) { + pool->frame_bufs[i].ref_count = 0; + } +} + +static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth, + int subsampling_x, int subsampling_y) { + VP9_COMMON *const cm = &cpi->common; +#if !CONFIG_VP9_HIGHBITDEPTH + (void)use_highbitdepth; + assert(use_highbitdepth == 0); +#endif + + if (!cpi->initial_width || +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth != use_highbitdepth || +#endif + cm->subsampling_x != subsampling_x || + cm->subsampling_y != subsampling_y) { + cm->subsampling_x = subsampling_x; + cm->subsampling_y = subsampling_y; +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth = use_highbitdepth; +#endif + alloc_util_frame_buffers(cpi); + cpi->initial_width = cm->width; + cpi->initial_height = cm->height; + cpi->initial_mbs = cm->MBs; + } +} + +// TODO(angiebird): Check whether we can move this function to vpx_image.c +static INLINE void vpx_img_chroma_subsampling(vpx_img_fmt_t fmt, + unsigned int *subsampling_x, + unsigned int *subsampling_y) { + switch (fmt) { + case VPX_IMG_FMT_I420: + case VPX_IMG_FMT_YV12: + case VPX_IMG_FMT_I422: + case VPX_IMG_FMT_I42016: + case VPX_IMG_FMT_I42216: *subsampling_x = 1; break; + default: *subsampling_x = 0; break; + } + + switch (fmt) { + case VPX_IMG_FMT_I420: + case VPX_IMG_FMT_I440: + case VPX_IMG_FMT_YV12: + case VPX_IMG_FMT_I42016: + case VPX_IMG_FMT_I44016: *subsampling_y = 1; break; + default: *subsampling_y = 0; break; + } +} + +// TODO(angiebird): Check whether we can move this function to vpx_image.c +static INLINE int vpx_img_use_highbitdepth(vpx_img_fmt_t fmt) { + return fmt & VPX_IMG_FMT_HIGHBITDEPTH; +} + +#if CONFIG_VP9_TEMPORAL_DENOISING +static void setup_denoiser_buffer(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if (cpi->oxcf.noise_sensitivity > 0 && + !cpi->denoiser.frame_buffer_initialized) { + if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc, + cpi->oxcf.noise_sensitivity, cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_ENC_BORDER_IN_PIXELS)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate denoiser"); + } +} +#endif + +void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt) { + const VP9EncoderConfig *oxcf = &cpi->oxcf; + unsigned int subsampling_x, subsampling_y; + const int use_highbitdepth = vpx_img_use_highbitdepth(img_fmt); + vpx_img_chroma_subsampling(img_fmt, &subsampling_x, &subsampling_y); + + update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); +#if CONFIG_VP9_TEMPORAL_DENOISING + setup_denoiser_buffer(cpi); +#endif + + assert(cpi->lookahead == NULL); + cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, subsampling_x, + subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + use_highbitdepth, +#endif + oxcf->lag_in_frames); + alloc_raw_frame_buffers(cpi); +} + +VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf, BufferPool *const pool) { unsigned int i; VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP)); @@ -2170,10 +2297,13 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->resize_buffer_underflow = 0; cpi->use_skin_detection = 0; cpi->common.buffer_pool = pool; + init_ref_frame_bufs(cm); cpi->force_update_segmentation = 0; init_config(cpi, oxcf); + cpi->frame_info = vp9_get_frame_info(oxcf); + vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc); cm->current_video_frame = 0; @@ -2341,6 +2471,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, const int layer_id = (int)last_packet_for_layer->spatial_layer_id; const int packets_in_layer = (int)last_packet_for_layer->count + 1; if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) { + int num_frames; LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id]; vpx_free(lc->rc_twopass_stats_in.buf); @@ -2352,6 +2483,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, lc->twopass.stats_in = lc->twopass.stats_in_start; lc->twopass.stats_in_end = lc->twopass.stats_in_start + packets_in_layer - 1; + // Note the last packet is cumulative first pass stats. + // So the number of frames is packet number minus one + num_frames = packets_in_layer - 1; + fps_init_first_pass_info(&lc->twopass.first_pass_info, + lc->rc_twopass_stats_in.buf, num_frames); stats_copy[layer_id] = lc->rc_twopass_stats_in.buf; } } @@ -2367,6 +2503,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, vp9_init_second_pass_spatial_svc(cpi); } else { + int num_frames; #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { const size_t psz = cpi->common.MBs * sizeof(uint8_t); @@ -2383,6 +2520,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; cpi->twopass.stats_in = cpi->twopass.stats_in_start; cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; + // Note the last packet is cumulative first pass stats. + // So the number of frames is packet number minus one + num_frames = packets - 1; + fps_init_first_pass_info(&cpi->twopass.first_pass_info, + oxcf->two_pass_stats_in.buf, num_frames); vp9_init_second_pass(cpi); } @@ -2409,7 +2551,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->kmeans_data_arr_alloc = 0; #if CONFIG_NON_GREEDY_MV - cpi->feature_score_loc_alloc = 0; cpi->tpl_ready = 0; #endif // CONFIG_NON_GREEDY_MV for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL; @@ -2418,62 +2559,67 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff))); cpi->source_var_thresh = 0; cpi->frames_till_next_var_check = 0; +#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \ + cpi->fn_ptr[BT].sdf = SDF; \ + cpi->fn_ptr[BT].sdaf = SDAF; \ + cpi->fn_ptr[BT].vf = VF; \ + cpi->fn_ptr[BT].svf = SVF; \ + cpi->fn_ptr[BT].svaf = SVAF; \ + cpi->fn_ptr[BT].sdx4df = SDX4DF; \ + cpi->fn_ptr[BT].sdx8f = SDX8F; -#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \ - cpi->fn_ptr[BT].sdf = SDF; \ - cpi->fn_ptr[BT].sdaf = SDAF; \ - cpi->fn_ptr[BT].vf = VF; \ - cpi->fn_ptr[BT].svf = SVF; \ - cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; - + // TODO(angiebird): make sdx8f available for every block size BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16, vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16, - vpx_sad32x16x4d) + vpx_sad32x16x4d, NULL) BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32, vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32, - vpx_sad16x32x4d) + vpx_sad16x32x4d, NULL) BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32, vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32, - vpx_sad64x32x4d) + vpx_sad64x32x4d, NULL) BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64, vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64, - vpx_sad32x64x4d) + vpx_sad32x64x4d, NULL) BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32, vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32, - vpx_sad32x32x4d) + vpx_sad32x32x4d, vpx_sad32x32x8) BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64, vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64, - vpx_sad64x64x4d) + vpx_sad64x64x4d, NULL) BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16, vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16, - vpx_sad16x16x4d) + vpx_sad16x16x4d, vpx_sad16x16x8) BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8, vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8, - vpx_sad16x8x4d) + vpx_sad16x8x4d, vpx_sad16x8x8) BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16, vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16, - vpx_sad8x16x4d) + vpx_sad8x16x4d, vpx_sad8x16x8) BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8, - vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d) + vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d, + vpx_sad8x8x8) BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4, - vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d) + vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d, + NULL) BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8, - vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d) + vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d, + NULL) BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4, - vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d) + vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d, + vpx_sad4x4x8) #if CONFIG_VP9_HIGHBITDEPTH highbd_set_var_fns(cpi); @@ -2501,6 +2647,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cm->error.setjmp = 0; +#if CONFIG_RATE_CTRL + encode_command_init(&cpi->encode_command); +#endif + return cpi; } @@ -2511,9 +2661,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V)) #endif // CONFIG_INTERNAL_STATS +static void free_tpl_buffer(VP9_COMP *cpi); + void vp9_remove_compressor(VP9_COMP *cpi) { VP9_COMMON *cm; - unsigned int i, frame; + unsigned int i; int t; if (!cpi) return; @@ -2586,9 +2738,16 @@ void vp9_remove_compressor(VP9_COMP *cpi) { SNPRINT2(results, "\t%7.3f", cpi->worst_consistency); } - fprintf(f, "%s\t Time\tRcErr\tAbsErr\n", headings); - fprintf(f, "%s\t%8.0f\t%7.2f\t%7.2f\n", results, total_encode_time, - rate_err, fabs(rate_err)); + SNPRINT(headings, "\t Time\tRcErr\tAbsErr"); + SNPRINT2(results, "\t%8.0f", total_encode_time); + SNPRINT2(results, "\t%7.2f", rate_err); + SNPRINT2(results, "\t%7.2f", fabs(rate_err)); + + fprintf(f, "%s\tAPsnr611\n", headings); + fprintf( + f, "%s\t%7.3f\n", results, + (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) / + (cpi->count * 8)); } fclose(f); @@ -2618,27 +2777,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vpx_free(cpi->kmeans_data_arr); } -#if CONFIG_NON_GREEDY_MV - vpx_free(cpi->feature_score_loc_arr); - vpx_free(cpi->feature_score_loc_sort); - vpx_free(cpi->feature_score_loc_heap); - vpx_free(cpi->select_mv_arr); -#endif - for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { -#if CONFIG_NON_GREEDY_MV - int rf_idx; - for (rf_idx = 0; rf_idx < 3; ++rf_idx) { - int sqr_bsize; - for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) { - vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]); - } - vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]); - vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]); - } -#endif - vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); - cpi->tpl_stats[frame].is_valid = 0; - } + free_tpl_buffer(cpi); for (t = 0; t < cpi->num_workers; ++t) { VPxWorker *const worker = &cpi->workers[t]; @@ -2719,30 +2858,19 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #endif } -static void generate_psnr_packet(VP9_COMP *cpi) { - struct vpx_codec_cx_pkt pkt; - int i; - PSNR_STATS psnr; +int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) { + if (is_psnr_calc_enabled(cpi)) { #if CONFIG_VP9_HIGHBITDEPTH - vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr, - cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth); + vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr, + cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth); #else - vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr); + vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr); #endif - - for (i = 0; i < 4; ++i) { - pkt.data.psnr.samples[i] = psnr.samples[i]; - pkt.data.psnr.sse[i] = psnr.sse[i]; - pkt.data.psnr.psnr[i] = psnr.psnr[i]; + return 1; + } else { + vp9_zero(*psnr); + return 0; } - pkt.kind = VPX_CODEC_PSNR_PKT; - if (cpi->use_svc) - cpi->svc - .layer_context[cpi->svc.spatial_layer_id * - cpi->svc.number_temporal_layers] - .psnr_pkt = pkt.data.psnr; - else - vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); } int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) { @@ -3572,29 +3700,12 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index, vpx_calloc(cpi->un_scaled_source->y_width, sizeof(*cpi->common.postproc_state.limits)); } - vp9_denoise(cpi->Source, cpi->Source, l, cpi->common.postproc_state.limits); + vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l, + cpi->common.postproc_state.limits); } #endif // CONFIG_VP9_POSTPROC } -#if CONFIG_VP9_TEMPORAL_DENOISING -static void setup_denoiser_buffer(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if (cpi->oxcf.noise_sensitivity > 0 && - !cpi->denoiser.frame_buffer_initialized) { - if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc, - cpi->oxcf.noise_sensitivity, cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate denoiser"); - } -} -#endif - static void init_motion_estimation(VP9_COMP *cpi) { int y_stride = cpi->scaled_source.y_stride; @@ -4175,6 +4286,14 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, vp9_scale_references(cpi); } +#if CONFIG_RATE_CTRL + // TODO(angiebird): This is a hack for making sure the encoder use the + // external_quantize_index exactly. Avoid this kind of hack later. + if (cpi->encode_command.use_external_quantize_index) { + q = cpi->encode_command.external_quantize_index; + } +#endif + vp9_set_quantizer(cm, q); if (loop_count == 0) setup_frame(cpi); @@ -4213,6 +4332,16 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; } +#if CONFIG_RATE_CTRL + // This part needs to be after save_coding_context() because + // restore_coding_context will be called in the end of this function. + // TODO(angiebird): This is a hack for making sure the encoder use the + // external_quantize_index exactly. Avoid this kind of hack later. + if (cpi->encode_command.use_external_quantize_index) { + break; + } +#endif + if (oxcf->rc_mode == VPX_Q) { loop = 0; } else { @@ -4389,7 +4518,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, } if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) - if (loop || !enable_acl) restore_coding_context(cpi); + if (loop) restore_coding_context(cpi); } while (loop); #ifdef AGGRESSIVE_VBR @@ -4415,13 +4544,11 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, // Skip recoding, if model diff is below threshold const int thresh = compute_context_model_thresh(cpi); const int diff = compute_context_model_diff(cm); - if (diff < thresh) { - vpx_clear_system_state(); - restore_coding_context(cpi); - return; + if (diff >= thresh) { + vp9_encode_frame(cpi); } - - vp9_encode_frame(cpi); + } + if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { vpx_clear_system_state(); restore_coding_context(cpi); } @@ -4756,17 +4883,6 @@ static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) { } } -// Implementation and modifications of C. Yeo, H. L. Tan, and Y. H. Tan, "On -// rate distortion optimization using SSIM," Circuits and Systems for Video -// Technology, IEEE Transactions on, vol. 23, no. 7, pp. 1170-1181, 2013. -// SSIM_VAR_SCALE defines the strength of the bias towards SSIM in RDO. -// Some sample values are: -// (for midres test set) -// SSIM_VAR_SCALE avg_psnr ssim ms_ssim -// 8.0 9.421 -5.537 -6.898 -// 16.0 4.703 -5.378 -6.238 -// 32.0 1.929 -4.308 -4.807 -#define SSIM_VAR_SCALE 16.0 static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; ThreadData *td = &cpi->td; @@ -4783,19 +4899,6 @@ static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) { double log_sum = 0.0; int row, col; -#if CONFIG_VP9_HIGHBITDEPTH - double c2; - if (xd->bd == 10) { - c2 = 941.8761; // (.03*1023)^2 - } else if (xd->bd == 12) { - c2 = 15092.1225; // (.03*4095)^2 - } else { - c2 = 58.5225; // (.03*255)^2 - } -#else - const double c2 = 58.5225; // (.03*255)^2 -#endif - // Loop through each 64x64 block. for (row = 0; row < num_rows; ++row) { for (col = 0; col < num_cols; ++col) { @@ -4817,19 +4920,22 @@ static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) { // In order to make SSIM_VAR_SCALE in a same scale for both 8 bit // and high bit videos, the variance needs to be divided by 2.0 or // 64.0 separately. + // TODO(sdeng): need to tune for 12bit videos. #if CONFIG_VP9_HIGHBITDEPTH if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) - var += - vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd) / 2.0; + var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd); else #endif - var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8) / 64.0; + var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8); num_of_var += 1.0; } } - var = var / num_of_var / SSIM_VAR_SCALE; - var = 2.0 * var + c2; + var = var / num_of_var / 64.0; + + // Curve fitting with an exponential model on all 16x16 blocks from the + // Midres dataset. + var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222; cpi->mi_ssim_rdmult_scaling_factors[index] = var; log_sum += log(var); } @@ -4976,12 +5082,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, TX_SIZE t; // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0. - // If in constrained layer drop mode (svc.framedrop_mode != LAYER_DROP) and - // base spatial layer was dropped, no need to set svc.skip_enhancement_layer, - // as whole superframe will be dropped. + // No need to set svc.skip_enhancement_layer if whole superframe will be + // dropped. if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 && cpi->oxcf.target_bandwidth == 0 && !(cpi->svc.framedrop_mode != LAYER_DROP && + (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP || + cpi->svc + .force_drop_constrained_from_above[cpi->svc.number_spatial_layers - + 1]) && cpi->svc.drop_spatial_layer[0])) { cpi->svc.skip_enhancement_layer = 1; vp9_rc_postencode_update_drop_frame(cpi); @@ -4989,17 +5098,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, cpi->last_frame_dropped = 1; cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1; cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1; - if (cpi->svc.framedrop_mode == LAYER_DROP || - cpi->svc.drop_spatial_layer[0] == 0) { - // For the case of constrained drop mode where the base is dropped - // (drop_spatial_layer[0] == 1), which means full superframe dropped, - // we don't increment the svc frame counters. In particular temporal - // layer counter (which is incremented in vp9_inc_frame_in_layer()) - // won't be incremented, so on a dropped frame we try the same - // temporal_layer_id on next incoming frame. This is to avoid an - // issue with temporal alignement with full superframe dropping. - vp9_inc_frame_in_layer(cpi); - } + vp9_inc_frame_in_layer(cpi); return; } @@ -5285,54 +5384,9 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, mismatch_move_frame_idx_w(); #endif encode_frame_to_data_rate(cpi, size, dest, frame_flags); - - vp9_twopass_postencode_update(cpi); } #endif // !CONFIG_REALTIME_ONLY -static void init_ref_frame_bufs(VP9_COMMON *cm) { - int i; - BufferPool *const pool = cm->buffer_pool; - cm->new_fb_idx = INVALID_IDX; - for (i = 0; i < REF_FRAMES; ++i) { - cm->ref_frame_map[i] = INVALID_IDX; - } - for (i = 0; i < FRAME_BUFFERS; ++i) { - pool->frame_bufs[i].ref_count = 0; - } -} - -static void check_initial_width(VP9_COMP *cpi, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int subsampling_x, int subsampling_y) { - VP9_COMMON *const cm = &cpi->common; - - if (!cpi->initial_width || -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth != use_highbitdepth || -#endif - cm->subsampling_x != subsampling_x || - cm->subsampling_y != subsampling_y) { - cm->subsampling_x = subsampling_x; - cm->subsampling_y = subsampling_y; -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth = use_highbitdepth; -#endif - - alloc_raw_frame_buffers(cpi); - init_ref_frame_bufs(cm); - alloc_util_frame_buffers(cpi); - - init_motion_estimation(cpi); // TODO(agrange) This can be removed. - - cpi->initial_width = cm->width; - cpi->initial_height = cm->height; - cpi->initial_mbs = cm->MBs; - } -} - int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time) { @@ -5343,30 +5397,21 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags, const int subsampling_y = sd->subsampling_y; #if CONFIG_VP9_HIGHBITDEPTH const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0; -#endif - -#if CONFIG_VP9_HIGHBITDEPTH - check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); #else - check_initial_width(cpi, subsampling_x, subsampling_y); -#endif // CONFIG_VP9_HIGHBITDEPTH - -#if CONFIG_VP9_HIGHBITDEPTH - // Disable denoiser for high bitdepth since vp9_denoiser_filter only works for - // 8 bits. - if (cm->bit_depth > 8) cpi->oxcf.noise_sensitivity = 0; + const int use_highbitdepth = 0; #endif + update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); #if CONFIG_VP9_TEMPORAL_DENOISING setup_denoiser_buffer(cpi); #endif + + alloc_raw_frame_buffers(cpi); + vpx_usec_timer_start(&timer); if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, -#if CONFIG_VP9_HIGHBITDEPTH - use_highbitdepth, -#endif // CONFIG_VP9_HIGHBITDEPTH - frame_flags)) + use_highbitdepth, frame_flags)) res = -1; vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); @@ -5867,18 +5912,89 @@ static void init_tpl_stats(VP9_COMP *cpi) { } #if CONFIG_NON_GREEDY_MV -static uint32_t motion_compensated_prediction( - VP9_COMP *cpi, ThreadData *td, int frame_idx, uint8_t *cur_frame_buf, - uint8_t *ref_frame_buf, int stride, BLOCK_SIZE bsize, int mi_row, - int mi_col, MV *mv, int rf_idx) { -#else // CONFIG_NON_GREEDY_MV +static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td, + MotionField *motion_field, + int frame_idx, uint8_t *cur_frame_buf, + uint8_t *ref_frame_buf, int stride, + BLOCK_SIZE bsize, int mi_row, + int mi_col, MV *mv) { + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; + int step_param; + uint32_t bestsme = UINT_MAX; + const MvLimits tmp_mv_limits = x->mv_limits; + // lambda is used to adjust the importance of motion vector consitency. + // TODO(angiebird): Figure out lambda's proper value. + const int lambda = cpi->tpl_stats[frame_idx].lambda; + int_mv nb_full_mvs[NB_MVS_NUM]; + int nb_full_mv_num; + + MV best_ref_mv1 = { 0, 0 }; + MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ + + best_ref_mv1_full.col = best_ref_mv1.col >> 3; + best_ref_mv1_full.row = best_ref_mv1.row >> 3; + + // Setup frame pointers + x->plane[0].src.buf = cur_frame_buf; + x->plane[0].src.stride = stride; + xd->plane[0].pre[0].buf = ref_frame_buf; + xd->plane[0].pre[0].stride = stride; + + step_param = mv_sf->reduce_first_step_size; + step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); + + vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); + + nb_full_mv_num = + vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs); + vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param, + lambda, 1, nb_full_mvs, nb_full_mv_num, mv); + + /* restore UMV window */ + x->mv_limits = tmp_mv_limits; + + return bestsme; +} + +static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td, + uint8_t *cur_frame_buf, + uint8_t *ref_frame_buf, int stride, + BLOCK_SIZE bsize, MV *mv) { + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; + uint32_t bestsme = UINT_MAX; + uint32_t distortion; + uint32_t sse; + int cost_list[5]; + + MV best_ref_mv1 = { 0, 0 }; + + // Setup frame pointers + x->plane[0].src.buf = cur_frame_buf; + x->plane[0].src.stride = stride; + xd->plane[0].pre[0].buf = ref_frame_buf; + xd->plane[0].pre[0].stride = stride; + + // TODO(yunqing): may use higher tap interp filter than 2 taps. + // Ignore mv costing by sending NULL pointer instead of cost array + bestsme = cpi->find_fractional_mv_step( + x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, + &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level, + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0, + USE_2_TAPS); + + return bestsme; +} + +#else // CONFIG_NON_GREEDY_MV static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, - int frame_idx, uint8_t *cur_frame_buf, uint8_t *ref_frame_buf, int stride, BLOCK_SIZE bsize, - int mi_row, int mi_col, MV *mv) { -#endif // CONFIG_NON_GREEDY_MV + MV *mv) { MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; @@ -5890,12 +6006,6 @@ static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, uint32_t sse; int cost_list[5]; const MvLimits tmp_mv_limits = x->mv_limits; -#if CONFIG_NON_GREEDY_MV - // lambda is used to adjust the importance of motion vector consitency. - // TODO(angiebird): Figure out lambda's proper value. - const int lambda = cpi->tpl_stats[frame_idx].lambda; - int_mv nb_full_mvs[NB_MVS_NUM]; -#endif MV best_ref_mv1 = { 0, 0 }; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ @@ -5914,21 +6024,9 @@ static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); -#if CONFIG_NON_GREEDY_MV - (void)search_method; - (void)sadpb; - vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx, - bsize, nb_full_mvs); - vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1, - &cpi->fn_ptr[bsize], nb_full_mvs, NB_MVS_NUM, mv); -#else - (void)frame_idx; - (void)mi_row; - (void)mi_col; vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param, search_method, sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1, mv, 0, 0); -#endif /* restore UMV window */ x->mv_limits = tmp_mv_limits; @@ -5943,6 +6041,7 @@ static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, return bestsme; } +#endif static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row, int ref_pos_col, int block, BLOCK_SIZE bsize) { @@ -6224,19 +6323,22 @@ static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, set_mv_limits(cm, x, mi_row, mi_col); - for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { int_mv mv; +#if CONFIG_NON_GREEDY_MV + MotionField *motion_field; +#endif if (ref_frame[rf_idx] == NULL) continue; #if CONFIG_NON_GREEDY_MV (void)td; - mv.as_int = - get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col)->as_int; + motion_field = vp9_motion_field_info_get_motion_field( + &cpi->motion_field_info, frame_idx, rf_idx, bsize); + mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col); #else - motion_compensated_prediction( - cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset, - ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize, - mi_row, mi_col, &mv.as_mv); + motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset, + ref_frame[rf_idx]->y_buffer + mb_y_offset, + xd->cur_buf->y_stride, bsize, &mv.as_mv); #endif #if CONFIG_VP9_HIGHBITDEPTH @@ -6378,8 +6480,9 @@ static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame, } static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi, - TplDepFrame *tpl_frame, int rf_idx, - BLOCK_SIZE bsize, int mi_row, int mi_col) { + MotionField *motion_field, + TplDepFrame *tpl_frame, BLOCK_SIZE bsize, + int mi_row, int mi_col) { int_mv mv; switch (mv_mode) { case ZERO_MV_MODE: @@ -6387,7 +6490,7 @@ static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi, mv.as_mv.col = 0; break; case NEW_MV_MODE: - mv = *get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col); + mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col); break; case NEAREST_MV_MODE: mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col); @@ -6404,15 +6507,16 @@ static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi, } static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd, - GF_PICTURE *gf_picture, int frame_idx, - TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, - int mi_row, int mi_col, int_mv *mv) { + GF_PICTURE *gf_picture, MotionField *motion_field, + int frame_idx, TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int_mv *mv) { uint32_t sse; struct buf_2d src; struct buf_2d pre; MV full_mv; - *mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row, - mi_col); + *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize, + mi_row, mi_col); full_mv = get_full_mv(&mv->as_mv); if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col, &src, &pre)) { @@ -6449,18 +6553,18 @@ static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) { mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT); return mv_diff_cost; } -static double get_mv_cost(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame, - int rf_idx, BLOCK_SIZE bsize, int mi_row, +static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field, + TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row, int mi_col) { double mv_cost = get_mv_mode_cost(mv_mode); if (mv_mode == NEW_MV_MODE) { - MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize, - mi_row, mi_col) + MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, + bsize, mi_row, mi_col) .as_mv; - MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, tpl_frame, rf_idx, - bsize, mi_row, mi_col) + MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field, + tpl_frame, bsize, mi_row, mi_col) .as_mv; - MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, tpl_frame, rf_idx, + MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col) .as_mv; double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv); @@ -6471,21 +6575,24 @@ static double get_mv_cost(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame, } static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x, - GF_PICTURE *gf_picture, int frame_idx, - TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, - int mi_row, int mi_col, int_mv *mv) { + GF_PICTURE *gf_picture, MotionField *motion_field, + int frame_idx, TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int_mv *mv) { MACROBLOCKD *xd = &x->e_mbd; - double mv_dist = get_mv_dist(mv_mode, cpi, xd, gf_picture, frame_idx, - tpl_frame, rf_idx, bsize, mi_row, mi_col, mv); + double mv_dist = + get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx, + tpl_frame, rf_idx, bsize, mi_row, mi_col, mv); double mv_cost = - get_mv_cost(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row, mi_col); + get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col); double mult = 180; return mv_cost + mult * log2f(1 + mv_dist); } static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, - GF_PICTURE *gf_picture, int frame_idx, + GF_PICTURE *gf_picture, + MotionField *motion_field, int frame_idx, TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, int mi_row, int mi_col, double *rd, int_mv *mv) { @@ -6499,8 +6606,8 @@ static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, if (mv_mode == NEW_MV_MODE) { continue; } - this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, frame_idx, tpl_frame, - rf_idx, bsize, mi_row, mi_col, &this_mv); + this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx, + tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv); if (update == 0) { *rd = this_rd; *mv = this_mv; @@ -6518,8 +6625,8 @@ static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, } static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, - GF_PICTURE *gf_picture, int frame_idx, - TplDepFrame *tpl_frame, int rf_idx, + GF_PICTURE *gf_picture, MotionField *motion_field, + int frame_idx, TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, int mi_row, int mi_col) { const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; @@ -6549,9 +6656,9 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { double this_rd; int_mv *mv = &select_mv_arr[nb_row * stride + nb_col]; - mv_mode_arr[nb_row * stride + nb_col] = - find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, - rf_idx, bsize, nb_row, nb_col, &this_rd, mv); + mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode( + cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx, + bsize, nb_row, nb_col, &this_rd, mv); if (r == 0 && c == 0) { this_no_new_mv_rd = this_rd; } @@ -6565,9 +6672,9 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, // new mv mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE; - this_new_mv_rd = eval_mv_mode(NEW_MV_MODE, cpi, x, gf_picture, frame_idx, - tpl_frame, rf_idx, bsize, mi_row, mi_col, - &select_mv_arr[mi_row * stride + mi_col]); + this_new_mv_rd = eval_mv_mode( + NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, + rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]); new_mv_rd = this_new_mv_rd; // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE // beforehand. @@ -6580,9 +6687,9 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { double this_rd; int_mv *mv = &select_mv_arr[nb_row * stride + nb_col]; - mv_mode_arr[nb_row * stride + nb_col] = - find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, - rf_idx, bsize, nb_row, nb_col, &this_rd, mv); + mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode( + cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx, + bsize, nb_row, nb_col, &this_rd, mv); new_mv_rd += this_rd; } } @@ -6612,7 +6719,8 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, } static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x, - GF_PICTURE *gf_picture, int frame_idx, + GF_PICTURE *gf_picture, + MotionField *motion_field, int frame_idx, TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize) { const int mi_height = num_8x8_blocks_high_lookup[bsize]; @@ -6631,160 +6739,40 @@ static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x, assert(c >= 0 && c < unit_cols); assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows); assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols); - predict_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, rf_idx, bsize, - mi_row, mi_col); + predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, + rf_idx, bsize, mi_row, mi_col); } } } -static double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows, - int cols) { - double IxIx = 0; - double IxIy = 0; - double IyIy = 0; - double score; - int r, c; - vpx_clear_system_state(); - for (r = 0; r + 1 < rows; ++r) { - for (c = 0; c + 1 < cols; ++c) { - int diff_x = buf[r * stride + c] - buf[r * stride + c + 1]; - int diff_y = buf[r * stride + c] - buf[(r + 1) * stride + c]; - IxIx += diff_x * diff_x; - IxIy += diff_x * diff_y; - IyIy += diff_y * diff_y; - } - } - IxIx /= (rows - 1) * (cols - 1); - IxIy /= (rows - 1) * (cols - 1); - IyIy /= (rows - 1) * (cols - 1); - score = (IxIx * IyIy - IxIy * IxIy + 0.0001) / (IxIx + IyIy + 0.0001); - return score; -} - -static int compare_feature_score(const void *a, const void *b) { - const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a; - const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b; - if (aa->feature_score < bb->feature_score) { - return 1; - } else if (aa->feature_score > bb->feature_score) { - return -1; - } else { - return 0; - } -} - -static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx, - YV12_BUFFER_CONFIG **ref_frame, BLOCK_SIZE bsize, +static void do_motion_search(VP9_COMP *cpi, ThreadData *td, + MotionField *motion_field, int frame_idx, + YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize, int mi_row, int mi_col) { VP9_COMMON *cm = &cpi->common; MACROBLOCK *x = &td->mb; MACROBLOCKD *xd = &x->e_mbd; - TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; - TplDepStats *tpl_stats = - &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; const int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; - int rf_idx; - + assert(ref_frame != NULL); set_mv_limits(cm, x, mi_row, mi_col); - - for (rf_idx = 0; rf_idx < 3; ++rf_idx) { - int_mv *mv = get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col); - if (ref_frame[rf_idx] == NULL) { - tpl_stats->ready[rf_idx] = 0; - continue; - } else { - tpl_stats->ready[rf_idx] = 1; - } - motion_compensated_prediction( - cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset, - ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize, - mi_row, mi_col, &mv->as_mv, rf_idx); - } -} - -#define CHANGE_MV_SEARCH_ORDER 1 -#define USE_PQSORT 1 - -#if CHANGE_MV_SEARCH_ORDER -#if USE_PQSORT -static void max_heap_pop(FEATURE_SCORE_LOC **heap, int *size, - FEATURE_SCORE_LOC **output) { - if (*size > 0) { - *output = heap[0]; - --*size; - if (*size > 0) { - int p, l, r; - heap[0] = heap[*size]; - p = 0; - l = 2 * p + 1; - r = 2 * p + 2; - while (l < *size) { - FEATURE_SCORE_LOC *tmp; - int c = l; - if (r < *size && heap[r]->feature_score > heap[l]->feature_score) { - c = r; - } - if (heap[p]->feature_score >= heap[c]->feature_score) { - break; - } - tmp = heap[p]; - heap[p] = heap[c]; - heap[c] = tmp; - p = c; - l = 2 * p + 1; - r = 2 * p + 2; - } - } - } else { - assert(0); - } -} - -static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size, - FEATURE_SCORE_LOC *input) { - int c, p; - FEATURE_SCORE_LOC *tmp; - input->visited = 1; - heap[*size] = input; - ++*size; - c = *size - 1; - p = c >> 1; - while (c > 0 && heap[c]->feature_score > heap[p]->feature_score) { - tmp = heap[p]; - heap[p] = heap[c]; - heap[c] = tmp; - c = p; - p >>= 1; - } -} - -static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame, - BLOCK_SIZE bsize, int mi_row, int mi_col, - int *heap_size) { - const int mi_unit = num_8x8_blocks_wide_lookup[bsize]; - const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; - int i; - for (i = 0; i < NB_MVS_NUM; ++i) { - int r = dirs[i][0] * mi_unit; - int c = dirs[i][1] * mi_unit; - if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 && - mi_col + c < tpl_frame->mi_cols) { - FEATURE_SCORE_LOC *fs_loc = - &cpi->feature_score_loc_arr[(mi_row + r) * tpl_frame->stride + - (mi_col + c)]; - if (fs_loc->visited == 0) { - max_heap_push(cpi->feature_score_loc_heap, heap_size, fs_loc); - } - } - } -} -#endif // USE_PQSORT -#endif // CHANGE_MV_SEARCH_ORDER - -static void build_motion_field(VP9_COMP *cpi, MACROBLOCKD *xd, int frame_idx, - YV12_BUFFER_CONFIG *ref_frame[3], - BLOCK_SIZE bsize) { + { + int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col); + uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset; + uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset; + const int stride = xd->cur_buf->y_stride; + full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf, + ref_frame_buf, stride, bsize, mi_row, mi_col, + &mv.as_mv); + sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride, + bsize, &mv.as_mv); + vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv); + } +} + +static void build_motion_field( + VP9_COMP *cpi, int frame_idx, + YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) { VP9_COMMON *cm = &cpi->common; ThreadData *td = &cpi->td; TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; @@ -6792,79 +6780,26 @@ static void build_motion_field(VP9_COMP *cpi, MACROBLOCKD *xd, int frame_idx, const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int pw = num_4x4_blocks_wide_lookup[bsize] << 2; const int ph = num_4x4_blocks_high_lookup[bsize] << 2; - int fs_loc_sort_size; - int fs_loc_heap_size; int mi_row, mi_col; + int rf_idx; tpl_frame->lambda = (pw * ph) >> 2; assert(pw * ph == tpl_frame->lambda << 2); - fs_loc_sort_size = 0; - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { - const int mb_y_offset = - mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; - const int bw = 4 << b_width_log2_lookup[bsize]; - const int bh = 4 << b_height_log2_lookup[bsize]; - TplDepStats *tpl_stats = - &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; - FEATURE_SCORE_LOC *fs_loc = - &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col]; - tpl_stats->feature_score = get_feature_score( - xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh); - fs_loc->visited = 0; - fs_loc->feature_score = tpl_stats->feature_score; - fs_loc->mi_row = mi_row; - fs_loc->mi_col = mi_col; - cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc; - ++fs_loc_sort_size; - } - } - - qsort(cpi->feature_score_loc_sort, fs_loc_sort_size, - sizeof(*cpi->feature_score_loc_sort), compare_feature_score); - - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { - int rf_idx; - for (rf_idx = 0; rf_idx < 3; ++rf_idx) { - TplDepStats *tpl_stats = - &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; - tpl_stats->ready[rf_idx] = 0; - } + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + MotionField *motion_field = vp9_motion_field_info_get_motion_field( + &cpi->motion_field_info, frame_idx, rf_idx, bsize); + if (ref_frame[rf_idx] == NULL) { + continue; } - } - -#if CHANGE_MV_SEARCH_ORDER -#if !USE_PQSORT - for (i = 0; i < fs_loc_sort_size; ++i) { - FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i]; - do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row, - fs_loc->mi_col); - } -#else // !USE_PQSORT - fs_loc_heap_size = 0; - max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size, - cpi->feature_score_loc_sort[0]); - - while (fs_loc_heap_size > 0) { - FEATURE_SCORE_LOC *fs_loc; - max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc); - - do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row, - fs_loc->mi_col); - - add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col, - &fs_loc_heap_size); - } -#endif // !USE_PQSORT -#else // CHANGE_MV_SEARCH_ORDER - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { - do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col); + vp9_motion_field_reset_mvs(motion_field); + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { + do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx], + bsize, mi_row, mi_col); + } } } -#endif // CHANGE_MV_SEARCH_ORDER } #endif // CONFIG_NON_GREEDY_MV @@ -6872,7 +6807,7 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, BLOCK_SIZE bsize) { TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame; - YV12_BUFFER_CONFIG *ref_frame[3] = { NULL, NULL, NULL }; + YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL }; VP9_COMMON *cm = &cpi->common; struct scale_factors sf; @@ -6922,7 +6857,7 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, // Prepare reference frame pointers. If any reference frame slot is // unavailable, the pointer will be set to Null. - for (idx = 0; idx < 3; ++idx) { + for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) { int rf_idx = gf_picture[frame_idx].ref_frame[idx]; if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame; } @@ -6945,13 +6880,15 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES; ++square_block_idx) { BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx); - build_motion_field(cpi, xd, frame_idx, ref_frame, square_bsize); + build_motion_field(cpi, frame_idx, ref_frame, square_bsize); } - for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx]; if (ref_frame_idx != -1) { - predict_mv_mode_arr(cpi, x, gf_picture, frame_idx, tpl_frame, rf_idx, - bsize); + MotionField *motion_field = vp9_motion_field_info_get_motion_field( + &cpi->motion_field_info, frame_idx, rf_idx, bsize); + predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx, + tpl_frame, rf_idx, bsize); } } #endif @@ -7001,7 +6938,7 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames, const VP9_COMMON *cm = &cpi->common; int rf_idx; for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) { - for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; int mi_row, mi_col; int ref_frame_idx; @@ -7022,8 +6959,9 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames, for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) { for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) { - int_mv mv = - *get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col); + int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info, + frame_idx, rf_idx, bsize, + mi_row, mi_col); printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row, mv.as_mv.col); } @@ -7067,26 +7005,8 @@ static void init_tpl_buffer(VP9_COMP *cpi) { const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); #if CONFIG_NON_GREEDY_MV - int sqr_bsize; int rf_idx; - // TODO(angiebird): This probably needs further modifications to support - // frame scaling later on. - if (cpi->feature_score_loc_alloc == 0) { - // The smallest block size of motion field is 4x4, but the mi_unit is 8x8, - // therefore the number of units is "mi_rows * mi_cols * 4" here. - CHECK_MEM_ERROR( - cm, cpi->feature_score_loc_arr, - vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->feature_score_loc_arr))); - CHECK_MEM_ERROR(cm, cpi->feature_score_loc_sort, - vpx_calloc(mi_rows * mi_cols * 4, - sizeof(*cpi->feature_score_loc_sort))); - CHECK_MEM_ERROR(cm, cpi->feature_score_loc_heap, - vpx_calloc(mi_rows * mi_cols * 4, - sizeof(*cpi->feature_score_loc_heap))); - - cpi->feature_score_loc_alloc = 1; - } vpx_free(cpi->select_mv_arr); CHECK_MEM_ERROR( cm, cpi->select_mv_arr, @@ -7101,16 +7021,7 @@ static void init_tpl_buffer(VP9_COMP *cpi) { continue; #if CONFIG_NON_GREEDY_MV - for (rf_idx = 0; rf_idx < 3; ++rf_idx) { - for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) { - vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]); - CHECK_MEM_ERROR( - cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize], - vpx_calloc( - mi_rows * mi_cols * 4, - sizeof( - *cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]))); - } + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]); CHECK_MEM_ERROR( cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx], @@ -7141,6 +7052,25 @@ static void init_tpl_buffer(VP9_COMP *cpi) { } } +static void free_tpl_buffer(VP9_COMP *cpi) { + int frame; +#if CONFIG_NON_GREEDY_MV + vp9_free_motion_field_info(&cpi->motion_field_info); + vpx_free(cpi->select_mv_arr); +#endif + for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { +#if CONFIG_NON_GREEDY_MV + int rf_idx; + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]); + vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]); + } +#endif + vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); + cpi->tpl_stats[frame].is_valid = 0; + } +} + static void setup_tpl_stats(VP9_COMP *cpi) { GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE]; const GF_GROUP *gf_group = &cpi->twopass.gf_group; @@ -7165,9 +7095,39 @@ static void setup_tpl_stats(VP9_COMP *cpi) { #endif // CONFIG_NON_GREEDY_MV } +static void init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) { + encode_frame_result->show_idx = -1; // Actual encoding doesn't happen. +} + +#if !CONFIG_REALTIME_ONLY +static void update_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result, + int show_idx, + FRAME_UPDATE_TYPE update_type, + const YV12_BUFFER_CONFIG *source_frame, + const YV12_BUFFER_CONFIG *coded_frame, + int quantize_index, uint32_t bit_depth, + uint32_t input_bit_depth) { + PSNR_STATS psnr; +#if CONFIG_VP9_HIGHBITDEPTH + vpx_calc_highbd_psnr(source_frame, coded_frame, &psnr, bit_depth, + input_bit_depth); +#else + (void)bit_depth; + (void)input_bit_depth; + vpx_calc_psnr(source_frame, coded_frame, &psnr); +#endif + encode_frame_result->psnr = psnr.psnr[0]; + encode_frame_result->sse = psnr.sse[0]; + encode_frame_result->show_idx = show_idx; + encode_frame_result->update_type = update_type; + encode_frame_result->quantize_index = quantize_index; +} +#endif // !CONFIG_REALTIME_ONLY + int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, - int64_t *time_end, int flush) { + int64_t *time_end, int flush, + ENCODE_FRAME_RESULT *encode_frame_result) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; VP9_COMMON *const cm = &cpi->common; BufferPool *const pool = cm->buffer_pool; @@ -7179,6 +7139,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, int arf_src_index; const int gf_group_index = cpi->twopass.gf_group.index; int i; + init_encode_frame_result(encode_frame_result); if (is_one_pass_cbr_svc(cpi)) { vp9_one_pass_cbr_svc_start_layer(cpi); @@ -7284,10 +7245,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (source != NULL) { cm->show_frame = 1; cm->intra_only = 0; - // if the flags indicate intra frame, but if the current picture is for - // non-zero spatial layer, it should not be an intra picture. + // If the flags indicate intra frame, but if the current picture is for + // spatial layer above first_spatial_layer_to_encode, it should not be an + // intra picture. if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc && - cpi->svc.spatial_layer_id > 0) { + cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) { source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF); } @@ -7313,12 +7275,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0; } else { *size = 0; -#if !CONFIG_REALTIME_ONLY - if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) { - vp9_end_first_pass(cpi); /* get last stats packet */ - cpi->twopass.first_pass_done = 1; - } -#endif // !CONFIG_REALTIME_ONLY return -1; } @@ -7389,6 +7345,19 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->kmeans_data_arr_alloc = 1; } +#if CONFIG_NON_GREEDY_MV + { + const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); + Status status = vp9_alloc_motion_field_info( + &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols); + if (status == STATUS_FAILED) { + vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR, + "vp9_alloc_motion_field_info failed"); + } + } +#endif // CONFIG_NON_GREEDY_MV + if (gf_group_index == 1 && cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE && cpi->sf.enable_tpl_model) { @@ -7432,6 +7401,25 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, vp9_first_pass(cpi, source); } else if (oxcf->pass == 2 && !cpi->use_svc) { Pass2Encode(cpi, size, dest, frame_flags); + // update_encode_frame_result() depends on twopass.gf_group.index and + // cm->new_fb_idx and cpi->Source are updated for current properly and have + // not been updated for the next frame yet. + // The update locations are as follows. + // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero() + // for the first frame in the gf_group and is updated for the next frame at + // vp9_twopass_postencode_update(). + // 2) cpi->Source is updated at the beginging of this function, i.e. + // vp9_get_compressed_data() + // 3) cm->new_fb_idx is updated at the beginging of this function by + // get_free_fb(cm) + // TODO(angiebird): Improve the codebase to make the update of frame + // dependent variables more robust. + update_encode_frame_result( + encode_frame_result, source->show_idx, + cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index], + cpi->Source, get_frame_new_buffer(cm), vp9_get_quantizer(cpi), + cpi->oxcf.input_bit_depth, cm->bit_depth); + vp9_twopass_postencode_update(cpi); } else if (cpi->use_svc) { SvcEncode(cpi, size, dest, frame_flags); } else { @@ -7464,9 +7452,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, vpx_usec_timer_mark(&cmptimer); cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer); - // Should we calculate metrics for the frame. - if (is_psnr_calc_enabled(cpi)) generate_psnr_packet(cpi); - if (cpi->keep_level_stats && oxcf->pass != 1) update_level_info(cpi, size, arf_src_index); @@ -7703,15 +7688,15 @@ int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, unsigned int height) { VP9_COMMON *cm = &cpi->common; #if CONFIG_VP9_HIGHBITDEPTH - check_initial_width(cpi, cm->use_highbitdepth, 1, 1); + update_initial_width(cpi, cm->use_highbitdepth, 1, 1); #else - check_initial_width(cpi, 1, 1); + update_initial_width(cpi, 0, 1, 1); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_TEMPORAL_DENOISING setup_denoiser_buffer(cpi); #endif - + alloc_raw_frame_buffers(cpi); if (width) { cm->width = width; if (cm->width > cpi->initial_width) { @@ -7740,7 +7725,7 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) { return; } -int vp9_get_quantizer(VP9_COMP *cpi) { return cpi->common.base_qindex; } +int vp9_get_quantizer(const VP9_COMP *cpi) { return cpi->common.base_qindex; } void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) { if (flags & diff --git a/libvpx/vp9/encoder/vp9_encoder.h b/libvpx/vp9/encoder/vp9_encoder.h index f157fdfc5..0a8623efb 100644 --- a/libvpx/vp9/encoder/vp9_encoder.h +++ b/libvpx/vp9/encoder/vp9_encoder.h @@ -20,8 +20,10 @@ #include "vpx_dsp/ssim.h" #endif #include "vpx_dsp/variance.h" +#include "vpx_dsp/psnr.h" #include "vpx_ports/system_state.h" #include "vpx_util/vpx_thread.h" +#include "vpx_util/vpx_timestamp.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_ppflags.h" @@ -152,7 +154,10 @@ typedef struct VP9EncoderConfig { int height; // height of data passed to the compressor unsigned int input_bit_depth; // Input bit depth. double init_framerate; // set to passed in framerate - int64_t target_bandwidth; // bandwidth to be used in bits per second + vpx_rational_t g_timebase; // equivalent to g_timebase in vpx_codec_enc_cfg_t + vpx_rational64_t g_timebase_in_ts; // g_timebase * TICKS_PER_SEC + + int64_t target_bandwidth; // bandwidth to be used in bits per second int noise_sensitivity; // pre processing blur: recommendation 0 int sharpness; // sharpening output: recommendation 0: @@ -259,7 +264,6 @@ typedef struct VP9EncoderConfig { unsigned int target_level; vpx_fixed_buf_t two_pass_stats_in; - struct vpx_codec_pkt_list *output_pkt_list; #if CONFIG_FP_MB_STATS vpx_fixed_buf_t firstpass_mb_stats_in; @@ -293,16 +297,9 @@ typedef struct TplDepStats { int ref_frame_index; int_mv mv; - -#if CONFIG_NON_GREEDY_MV - int ready[3]; - int64_t sse_arr[3]; - double feature_score; -#endif } TplDepStats; #if CONFIG_NON_GREEDY_MV -#define SQUARE_BLOCK_SIZES 4 #define ZERO_MV_MODE 0 #define NEW_MV_MODE 1 @@ -322,54 +319,11 @@ typedef struct TplDepFrame { int base_qindex; #if CONFIG_NON_GREEDY_MV int lambda; - int_mv *pyramid_mv_arr[3][SQUARE_BLOCK_SIZES]; int *mv_mode_arr[3]; double *rd_diff_arr[3]; #endif } TplDepFrame; -#if CONFIG_NON_GREEDY_MV -static INLINE int get_square_block_idx(BLOCK_SIZE bsize) { - if (bsize == BLOCK_4X4) { - return 0; - } - if (bsize == BLOCK_8X8) { - return 1; - } - if (bsize == BLOCK_16X16) { - return 2; - } - if (bsize == BLOCK_32X32) { - return 3; - } - assert(0 && "ERROR: non-square block size"); - return -1; -} - -static INLINE BLOCK_SIZE square_block_idx_to_bsize(int square_block_idx) { - if (square_block_idx == 0) { - return BLOCK_4X4; - } - if (square_block_idx == 1) { - return BLOCK_8X8; - } - if (square_block_idx == 2) { - return BLOCK_16X16; - } - if (square_block_idx == 3) { - return BLOCK_32X32; - } - assert(0 && "ERROR: invalid square_block_idx"); - return BLOCK_INVALID; -} - -static INLINE int_mv *get_pyramid_mv(const TplDepFrame *tpl_frame, int rf_idx, - BLOCK_SIZE bsize, int mi_row, int mi_col) { - return &tpl_frame->pyramid_mv_arr[rf_idx][get_square_block_idx(bsize)] - [mi_row * tpl_frame->stride + mi_col]; -} -#endif - #define TPL_DEP_COST_SCALE_LOG2 4 // TODO(jingning) All spatially adaptive variables should go to TileDataEnc. @@ -533,7 +487,6 @@ typedef enum { typedef struct { int8_t level_index; - uint8_t rc_config_updated; uint8_t fail_flag; int max_frame_size; // in bits double max_cpb_size; // in bits @@ -555,15 +508,6 @@ typedef struct EncFrameBuf { // Maximum operating frame buffer size needed for a GOP using ARF reference. #define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS) -#if CONFIG_NON_GREEDY_MV -typedef struct FEATURE_SCORE_LOC { - int visited; - double feature_score; - int mi_row; - int mi_col; -} FEATURE_SCORE_LOC; -#endif - #define MAX_KMEANS_GROUPS 8 typedef struct KMEANS_DATA { @@ -572,7 +516,33 @@ typedef struct KMEANS_DATA { int group_idx; } KMEANS_DATA; +#if CONFIG_RATE_CTRL +typedef struct ENCODE_COMMAND { + int use_external_quantize_index; + int external_quantize_index; +} ENCODE_COMMAND; + +static INLINE void encode_command_init(ENCODE_COMMAND *encode_command) { + vp9_zero(*encode_command); + encode_command->use_external_quantize_index = 0; + encode_command->external_quantize_index = -1; +} + +static INLINE void encode_command_set_external_quantize_index( + ENCODE_COMMAND *encode_command, int quantize_index) { + encode_command->use_external_quantize_index = 1; + encode_command->external_quantize_index = quantize_index; +} + +static INLINE void encode_command_reset_external_quantize_index( + ENCODE_COMMAND *encode_command) { + encode_command->use_external_quantize_index = 0; + encode_command->external_quantize_index = -1; +} +#endif // CONFIG_RATE_CTRL + typedef struct VP9_COMP { + FRAME_INFO frame_info; QUANTS quants; ThreadData td; MB_MODE_INFO_EXT *mbmi_ext_base; @@ -611,11 +581,8 @@ typedef struct VP9_COMP { int kmeans_count_ls[MAX_KMEANS_GROUPS]; int kmeans_ctr_num; #if CONFIG_NON_GREEDY_MV + MotionFieldInfo motion_field_info; int tpl_ready; - int feature_score_loc_alloc; - FEATURE_SCORE_LOC *feature_score_loc_arr; - FEATURE_SCORE_LOC **feature_score_loc_sort; - FEATURE_SCORE_LOC **feature_score_loc_heap; int_mv *select_mv_arr; #endif @@ -878,11 +845,23 @@ typedef struct VP9_COMP { int multi_layer_arf; vpx_roi_map_t roi; +#if CONFIG_RATE_CTRL + ENCODE_COMMAND encode_command; +#endif } VP9_COMP; +typedef struct ENCODE_FRAME_RESULT { + int show_idx; + FRAME_UPDATE_TYPE update_type; + double psnr; + uint64_t sse; + int quantize_index; +} ENCODE_FRAME_RESULT; + void vp9_initialize_enc(void); -struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, +void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt); +struct VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf, BufferPool *const pool); void vp9_remove_compressor(VP9_COMP *cpi); @@ -896,7 +875,8 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags, int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, - int64_t *time_end, int flush); + int64_t *time_end, int flush, + ENCODE_FRAME_RESULT *encode_frame_result); int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *flags); @@ -948,7 +928,7 @@ static INLINE void stack_init(int *stack, int length) { for (idx = 0; idx < length; ++idx) stack[idx] = -1; } -int vp9_get_quantizer(struct VP9_COMP *cpi); +int vp9_get_quantizer(const VP9_COMP *cpi); static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) { return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || @@ -1121,6 +1101,8 @@ void vp9_new_framerate(VP9_COMP *cpi, double framerate); void vp9_set_row_mt(VP9_COMP *cpi); +int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr); + #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl)) #ifdef __cplusplus diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c index e0acf563b..57ab583cf 100644 --- a/libvpx/vp9/encoder/vp9_firstpass.c +++ b/libvpx/vp9/encoder/vp9_firstpass.c @@ -84,14 +84,8 @@ static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) { return 1; } -static void output_stats(FIRSTPASS_STATS *stats, - struct vpx_codec_pkt_list *pktlist) { - struct vpx_codec_cx_pkt pkt; - pkt.kind = VPX_CODEC_STATS_PKT; - pkt.data.twopass_stats.buf = stats; - pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS); - vpx_codec_pkt_list_add(pktlist, &pkt); - +static void output_stats(FIRSTPASS_STATS *stats) { + (void)stats; // TEMP debug code #if OUTPUT_FPF { @@ -220,14 +214,14 @@ static void subtract_stats(FIRSTPASS_STATS *section, // bars and partially discounts other 0 energy areas. #define MIN_ACTIVE_AREA 0.5 #define MAX_ACTIVE_AREA 1.0 -static double calculate_active_area(const VP9_COMP *cpi, +static double calculate_active_area(const FRAME_INFO *frame_info, const FIRSTPASS_STATS *this_frame) { double active_pct; active_pct = 1.0 - ((this_frame->intra_skip_pct / 2) + - ((this_frame->inactive_zone_rows * 2) / (double)cpi->common.mb_rows)); + ((this_frame->inactive_zone_rows * 2) / (double)frame_info->mb_rows)); return fclamp(active_pct, MIN_ACTIVE_AREA, MAX_ACTIVE_AREA); } @@ -260,17 +254,16 @@ static double calculate_mod_frame_score(const VP9_COMP *cpi, // remaining active MBs. The correction here assumes that coding // 0.5N blocks of complexity 2X is a little easier than coding N // blocks of complexity X. - modified_score *= - pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION); + modified_score *= pow(calculate_active_area(&cpi->frame_info, this_frame), + ACT_AREA_CORRECTION); return modified_score; } -static double calculate_norm_frame_score(const VP9_COMP *cpi, - const TWO_PASS *twopass, - const VP9EncoderConfig *oxcf, - const FIRSTPASS_STATS *this_frame, - const double av_err) { +static double calc_norm_frame_score(const VP9EncoderConfig *oxcf, + const FRAME_INFO *frame_info, + const FIRSTPASS_STATS *this_frame, + double mean_mod_score, double av_err) { double modified_score = av_err * pow(this_frame->coded_error * this_frame->weight / DOUBLE_DIVIDE_CHECK(av_err), @@ -285,14 +278,22 @@ static double calculate_norm_frame_score(const VP9_COMP *cpi, // 0.5N blocks of complexity 2X is a little easier than coding N // blocks of complexity X. modified_score *= - pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION); + pow(calculate_active_area(frame_info, this_frame), ACT_AREA_CORRECTION); // Normalize to a midpoint score. - modified_score /= DOUBLE_DIVIDE_CHECK(twopass->mean_mod_score); - + modified_score /= DOUBLE_DIVIDE_CHECK(mean_mod_score); return fclamp(modified_score, min_score, max_score); } +static double calculate_norm_frame_score(const VP9_COMP *cpi, + const TWO_PASS *twopass, + const VP9EncoderConfig *oxcf, + const FIRSTPASS_STATS *this_frame, + const double av_err) { + return calc_norm_frame_score(oxcf, &cpi->frame_info, this_frame, + twopass->mean_mod_score, av_err); +} + // This function returns the maximum target rate per frame. static int frame_max_bits(const RATE_CONTROL *rc, const VP9EncoderConfig *oxcf) { @@ -312,7 +313,8 @@ void vp9_init_first_pass(VP9_COMP *cpi) { } void vp9_end_first_pass(VP9_COMP *cpi) { - output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); + output_stats(&cpi->twopass.total_stats); + cpi->twopass.first_pass_done = 1; vpx_free(cpi->twopass.fp_mb_float_stats); cpi->twopass.fp_mb_float_stats = NULL; } @@ -1421,7 +1423,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { // Don't want to do output stats with a stack variable! twopass->this_frame_stats = fps; - output_stats(&twopass->this_frame_stats, cpi->output_pkt_list); + output_stats(&twopass->this_frame_stats); accumulate_stats(&twopass->total_stats, &fps); #if CONFIG_FP_MB_STATS @@ -1747,15 +1749,16 @@ void vp9_init_second_pass(VP9_COMP *cpi) { #define LOW_CODED_ERR_PER_MB 10.0 #define NCOUNT_FRAME_II_THRESH 6.0 -static double get_sr_decay_rate(const VP9_COMP *cpi, +static double get_sr_decay_rate(const FRAME_INFO *frame_info, const FIRSTPASS_STATS *frame) { double sr_diff = (frame->sr_coded_error - frame->coded_error); double sr_decay = 1.0; double modified_pct_inter; double modified_pcnt_intra; const double motion_amplitude_part = - frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / - (cpi->initial_height + cpi->initial_width)); + frame->pcnt_motion * + ((frame->mvc_abs + frame->mvr_abs) / + (frame_info->frame_height + frame_info->frame_width)); modified_pct_inter = frame->pcnt_inter; if ((frame->coded_error > LOW_CODED_ERR_PER_MB) && @@ -1776,74 +1779,73 @@ static double get_sr_decay_rate(const VP9_COMP *cpi, // This function gives an estimate of how badly we believe the prediction // quality is decaying from frame to frame. -static double get_zero_motion_factor(const VP9_COMP *cpi, - const FIRSTPASS_STATS *frame) { - const double zero_motion_pct = frame->pcnt_inter - frame->pcnt_motion; - double sr_decay = get_sr_decay_rate(cpi, frame); +static double get_zero_motion_factor(const FRAME_INFO *frame_info, + const FIRSTPASS_STATS *frame_stats) { + const double zero_motion_pct = + frame_stats->pcnt_inter - frame_stats->pcnt_motion; + double sr_decay = get_sr_decay_rate(frame_info, frame_stats); return VPXMIN(sr_decay, zero_motion_pct); } #define ZM_POWER_FACTOR 0.75 -static double get_prediction_decay_rate(const VP9_COMP *cpi, - const FIRSTPASS_STATS *next_frame) { - const double sr_decay_rate = get_sr_decay_rate(cpi, next_frame); +static double get_prediction_decay_rate(const FRAME_INFO *frame_info, + const FIRSTPASS_STATS *frame_stats) { + const double sr_decay_rate = get_sr_decay_rate(frame_info, frame_stats); const double zero_motion_factor = - (0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion), + (0.95 * pow((frame_stats->pcnt_inter - frame_stats->pcnt_motion), ZM_POWER_FACTOR)); return VPXMAX(zero_motion_factor, (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor))); } +static int get_show_idx(const TWO_PASS *twopass) { + return (int)(twopass->stats_in - twopass->stats_in_start); +} // Function to test for a condition where a complex transition is followed // by a static section. For example in slide shows where there is a fade // between slides. This is to help with more optimal kf and gf positioning. -static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval, - int still_interval, - double loop_decay_rate, - double last_decay_rate) { - TWO_PASS *const twopass = &cpi->twopass; - RATE_CONTROL *const rc = &cpi->rc; - - // Break clause to detect very still sections after motion - // For example a static image after a fade or other transition - // instead of a clean scene cut. - if (frame_interval > rc->min_gf_interval && loop_decay_rate >= 0.999 && - last_decay_rate < 0.9) { - int j; - - // Look ahead a few frames to see if static condition persists... - for (j = 0; j < still_interval; ++j) { - const FIRSTPASS_STATS *stats = &twopass->stats_in[j]; - if (stats >= twopass->stats_in_end) break; - - if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break; - } +static int check_transition_to_still(const FIRST_PASS_INFO *first_pass_info, + int show_idx, int still_interval) { + int j; + int num_frames = fps_get_num_frames(first_pass_info); + if (show_idx + still_interval > num_frames) { + return 0; + } - // Only if it does do we signal a transition to still. - return j == still_interval; + // Look ahead a few frames to see if static condition persists... + for (j = 0; j < still_interval; ++j) { + const FIRSTPASS_STATS *stats = + fps_get_frame_stats(first_pass_info, show_idx + j); + if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break; } - return 0; + // Only if it does do we signal a transition to still. + return j == still_interval; } // This function detects a flash through the high relative pcnt_second_ref // score in the frame following a flash frame. The offset passed in should // reflect this. -static int detect_flash(const TWO_PASS *twopass, int offset) { - const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset); - +static int detect_flash_from_frame_stats(const FIRSTPASS_STATS *frame_stats) { // What we are looking for here is a situation where there is a // brief break in prediction (such as a flash) but subsequent frames // are reasonably well predicted by an earlier (pre flash) frame. // The recovery after a flash is indicated by a high pcnt_second_ref // useage or a second ref coded error notabley lower than the last // frame coded error. - return next_frame != NULL && - ((next_frame->sr_coded_error < next_frame->coded_error) || - ((next_frame->pcnt_second_ref > next_frame->pcnt_inter) && - (next_frame->pcnt_second_ref >= 0.5))); + if (frame_stats == NULL) { + return 0; + } + return (frame_stats->sr_coded_error < frame_stats->coded_error) || + ((frame_stats->pcnt_second_ref > frame_stats->pcnt_inter) && + (frame_stats->pcnt_second_ref >= 0.5)); +} + +static int detect_flash(const TWO_PASS *twopass, int offset) { + const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset); + return detect_flash_from_frame_stats(next_frame); } // Update the motion related elements to the GF arf boost calculation. @@ -1876,13 +1878,15 @@ static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats, #define BASELINE_ERR_PER_MB 12500.0 #define GF_MAX_BOOST 96.0 -static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame, +static double calc_frame_boost(const FRAME_INFO *frame_info, + const FIRSTPASS_STATS *this_frame, + int avg_frame_qindex, double this_frame_mv_in_out) { double frame_boost; - const double lq = vp9_convert_qindex_to_q( - cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth); + const double lq = + vp9_convert_qindex_to_q(avg_frame_qindex, frame_info->bit_depth); const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5); - const double active_area = calculate_active_area(cpi, this_frame); + const double active_area = calculate_active_area(frame_info, this_frame); // Underlying boost factor is based on inter error ratio. frame_boost = (BASELINE_ERR_PER_MB * active_area) / @@ -1921,7 +1925,8 @@ static double calc_kf_frame_boost(VP9_COMP *cpi, const double lq = vp9_convert_qindex_to_q( cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth); const double boost_q_correction = VPXMIN((0.50 + (lq * 0.015)), 2.00); - const double active_area = calculate_active_area(cpi, this_frame); + const double active_area = + calculate_active_area(&cpi->frame_info, this_frame); // Underlying boost factor is based on inter error ratio. frame_boost = (kf_err_per_mb(cpi) * active_area) / @@ -1946,8 +1951,10 @@ static double calc_kf_frame_boost(VP9_COMP *cpi, return VPXMIN(frame_boost, max_boost * boost_q_correction); } -static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { - TWO_PASS *const twopass = &cpi->twopass; +static int compute_arf_boost(const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, + int arf_show_idx, int f_frames, int b_frames, + int avg_frame_qindex) { int i; double boost_score = 0.0; double mv_ratio_accumulator = 0.0; @@ -1960,7 +1967,10 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { // Search forward from the proposed arf/next gf position. for (i = 0; i < f_frames; ++i) { - const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i); + const FIRSTPASS_STATS *this_frame = + fps_get_frame_stats(first_pass_info, arf_show_idx + i); + const FIRSTPASS_STATS *next_frame = + fps_get_frame_stats(first_pass_info, arf_show_idx + i + 1); if (this_frame == NULL) break; // Update the motion related elements to the boost calculation. @@ -1970,17 +1980,19 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { // We want to discount the flash frame itself and the recovery // frame that follows as both will have poor scores. - flash_detected = detect_flash(twopass, i) || detect_flash(twopass, i + 1); + flash_detected = detect_flash_from_frame_stats(this_frame) || + detect_flash_from_frame_stats(next_frame); // Accumulate the effect of prediction quality decay. if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(cpi, this_frame); + decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } - boost_score += decay_accumulator * - calc_frame_boost(cpi, this_frame, this_frame_mv_in_out); + boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame, + avg_frame_qindex, + this_frame_mv_in_out); } arf_boost = (int)boost_score; @@ -1995,7 +2007,10 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { // Search backward towards last gf position. for (i = -1; i >= -b_frames; --i) { - const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i); + const FIRSTPASS_STATS *this_frame = + fps_get_frame_stats(first_pass_info, arf_show_idx + i); + const FIRSTPASS_STATS *next_frame = + fps_get_frame_stats(first_pass_info, arf_show_idx + i + 1); if (this_frame == NULL) break; // Update the motion related elements to the boost calculation. @@ -2005,17 +2020,19 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { // We want to discount the the flash frame itself and the recovery // frame that follows as both will have poor scores. - flash_detected = detect_flash(twopass, i) || detect_flash(twopass, i + 1); + flash_detected = detect_flash_from_frame_stats(this_frame) || + detect_flash_from_frame_stats(next_frame); // Cumulative effect of prediction quality decay. if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(cpi, this_frame); + decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } - boost_score += decay_accumulator * - calc_frame_boost(cpi, this_frame, this_frame_mv_in_out); + boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame, + avg_frame_qindex, + this_frame_mv_in_out); } arf_boost += (int)boost_score; @@ -2026,6 +2043,15 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { return arf_boost; } +static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { + const FRAME_INFO *frame_info = &cpi->frame_info; + TWO_PASS *const twopass = &cpi->twopass; + const int avg_inter_frame_qindex = cpi->rc.avg_frame_qindex[INTER_FRAME]; + int arf_show_idx = get_show_idx(twopass); + return compute_arf_boost(frame_info, &twopass->first_pass_info, arf_show_idx, + f_frames, b_frames, avg_inter_frame_qindex); +} + // Calculate a section intra ratio used in setting max loop filter. static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin, const FIRSTPASS_STATS *end, @@ -2060,8 +2086,19 @@ static int64_t calculate_total_gf_group_bits(VP9_COMP *cpi, // Calculate the bits to be allocated to the group as a whole. if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0.0)) { + int key_frame_interval = rc->frames_since_key + rc->frames_to_key; + int distance_from_next_key_frame = + rc->frames_to_key - + (rc->baseline_gf_interval + rc->source_alt_ref_pending); + int max_gf_bits_bias = rc->avg_frame_bandwidth; + double gf_interval_bias_bits_normalize_factor = + (double)rc->baseline_gf_interval / 16; total_group_bits = (int64_t)(twopass->kf_group_bits * (gf_group_err / twopass->kf_group_error_left)); + // TODO(ravi): Experiment with different values of max_gf_bits_bias + total_group_bits += + (int64_t)((double)distance_from_next_key_frame / key_frame_interval * + max_gf_bits_bias * gf_interval_bias_bits_normalize_factor); } else { total_group_bits = 0; } @@ -2415,194 +2452,94 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise, #define ARF_ABS_ZOOM_THRESH 4.0 #define MAX_GF_BOOST 5400 -static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - VP9_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - VP9EncoderConfig *const oxcf = &cpi->oxcf; - TWO_PASS *const twopass = &cpi->twopass; - FIRSTPASS_STATS next_frame; - const FIRSTPASS_STATS *const start_pos = twopass->stats_in; - int i; - double gf_group_err = 0.0; - double gf_group_raw_error = 0.0; - double gf_group_noise = 0.0; - double gf_group_skip_pct = 0.0; - double gf_group_inactive_zone_rows = 0.0; - double gf_group_inter = 0.0; - double gf_group_motion = 0.0; - double gf_first_frame_err = 0.0; - double mod_frame_err = 0.0; +typedef struct RANGE { + int min; + int max; +} RANGE; - double mv_ratio_accumulator = 0.0; - double zero_motion_accumulator = 1.0; +static int get_gop_coding_frame_num( + int *use_alt_ref, const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc, + int gf_start_show_idx, const RANGE *active_gf_interval, + double gop_intra_factor, int lag_in_frames) { double loop_decay_rate = 1.00; - double last_loop_decay_rate = 1.00; - + double mv_ratio_accumulator = 0.0; double this_frame_mv_in_out = 0.0; double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; - double mv_ratio_accumulator_thresh; - double abs_mv_in_out_thresh; double sr_accumulator = 0.0; - const double av_err = get_distribution_av_err(cpi, twopass); - unsigned int allow_alt_ref = is_altref_enabled(cpi); - - int flash_detected; - int active_max_gf_interval; - int active_min_gf_interval; - int64_t gf_group_bits; - int gf_arf_bits; - const int is_key_frame = frame_is_intra_only(cm); - const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active; - int is_alt_ref_flash = 0; - - double gop_intra_factor = 1.0; - int gop_frames; - - // Reset the GF group data structures unless this is a key - // frame in which case it will already have been done. - if (is_key_frame == 0) { - vp9_zero(twopass->gf_group); - } - - vpx_clear_system_state(); - vp9_zero(next_frame); - - // Load stats for the current frame. - mod_frame_err = - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); - - // Note the error of the frame at the start of the group. This will be - // the GF frame error if we code a normal gf. - gf_first_frame_err = mod_frame_err; - - // If this is a key frame or the overlay from a previous arf then - // the error score / cost of this frame has already been accounted for. - if (arf_active_or_kf) { - gf_group_err -= gf_first_frame_err; - gf_group_raw_error -= this_frame->coded_error; - gf_group_noise -= this_frame->frame_noise_energy; - gf_group_skip_pct -= this_frame->intra_skip_pct; - gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows; - gf_group_inter -= this_frame->pcnt_inter; - gf_group_motion -= this_frame->pcnt_motion; - } - // Motion breakout threshold for loop below depends on image size. - mv_ratio_accumulator_thresh = - (cpi->initial_height + cpi->initial_width) / 4.0; - abs_mv_in_out_thresh = ARF_ABS_ZOOM_THRESH; - - // Set a maximum and minimum interval for the GF group. - // If the image appears almost completely static we can extend beyond this. - { - int int_max_q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality, - cpi->common.bit_depth)); - int q_term = (cm->current_video_frame == 0) - ? int_max_q / 32 - : (int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex, - cpi->common.bit_depth) / - 6); - active_min_gf_interval = - rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200); - active_min_gf_interval = - VPXMIN(active_min_gf_interval, rc->max_gf_interval + arf_active_or_kf); - - // The value chosen depends on the active Q range. At low Q we have - // bits to spare and are better with a smaller interval and smaller boost. - // At high Q when there are few bits to spare we are better with a longer - // interval to spread the cost of the GF. - active_max_gf_interval = 11 + arf_active_or_kf + VPXMIN(5, q_term); - - // Force max GF interval to be odd. - active_max_gf_interval = active_max_gf_interval | 0x01; - - // We have: active_min_gf_interval <= - // rc->max_gf_interval + arf_active_or_kf. - if (active_max_gf_interval < active_min_gf_interval) { - active_max_gf_interval = active_min_gf_interval; - } else { - active_max_gf_interval = VPXMIN(active_max_gf_interval, - rc->max_gf_interval + arf_active_or_kf); + double mv_ratio_accumulator_thresh = + (frame_info->frame_height + frame_info->frame_width) / 4.0; + double zero_motion_accumulator = 1.0; + int gop_coding_frames; + + *use_alt_ref = 1; + gop_coding_frames = 0; + while (gop_coding_frames < rc->static_scene_max_gf_interval && + gop_coding_frames < rc->frames_to_key) { + const FIRSTPASS_STATS *next_next_frame; + const FIRSTPASS_STATS *next_frame; + int flash_detected; + ++gop_coding_frames; + + next_frame = fps_get_frame_stats(first_pass_info, + gf_start_show_idx + gop_coding_frames); + if (next_frame == NULL) { + break; } - // Would the active max drop us out just before the near the next kf? - if ((active_max_gf_interval <= rc->frames_to_key) && - (active_max_gf_interval >= (rc->frames_to_key - rc->min_gf_interval))) - active_max_gf_interval = rc->frames_to_key / 2; - } - active_max_gf_interval = - VPXMAX(active_max_gf_interval, active_min_gf_interval); - - if (cpi->multi_layer_arf) { - int layers = 0; - int max_layers = VPXMIN(MAX_ARF_LAYERS, cpi->oxcf.enable_auto_arf); - - // Adapt the intra_error factor to active_max_gf_interval limit. - for (i = active_max_gf_interval; i > 0; i >>= 1) ++layers; - - layers = VPXMIN(max_layers, layers); - gop_intra_factor += (layers * 0.25); - } - - i = 0; - while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) { - ++i; - - // Accumulate error score of frames in this gf group. - mod_frame_err = - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); - gf_group_err += mod_frame_err; - gf_group_raw_error += this_frame->coded_error; - gf_group_noise += this_frame->frame_noise_energy; - gf_group_skip_pct += this_frame->intra_skip_pct; - gf_group_inactive_zone_rows += this_frame->inactive_zone_rows; - gf_group_inter += this_frame->pcnt_inter; - gf_group_motion += this_frame->pcnt_motion; - - if (EOF == input_stats(twopass, &next_frame)) break; - // Test for the case where there is a brief flash but the prediction // quality back to an earlier frame is then restored. - flash_detected = detect_flash(twopass, 0); + next_next_frame = fps_get_frame_stats( + first_pass_info, gf_start_show_idx + gop_coding_frames + 1); + flash_detected = detect_flash_from_frame_stats(next_next_frame); // Update the motion related elements to the boost calculation. accumulate_frame_motion_stats( - &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, + next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); // Monitor for static sections. - if ((rc->frames_since_key + i - 1) > 1) { - zero_motion_accumulator = VPXMIN( - zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); + if ((rc->frames_since_key + gop_coding_frames - 1) > 1) { + zero_motion_accumulator = + VPXMIN(zero_motion_accumulator, + get_zero_motion_factor(frame_info, next_frame)); } // Accumulate the effect of prediction quality decay. if (!flash_detected) { - last_loop_decay_rate = loop_decay_rate; - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + double last_loop_decay_rate = loop_decay_rate; + loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame); // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. - if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, - last_loop_decay_rate)) { - allow_alt_ref = 0; - break; + if (gop_coding_frames > rc->min_gf_interval && loop_decay_rate >= 0.999 && + last_loop_decay_rate < 0.9) { + int still_interval = 5; + if (check_transition_to_still(first_pass_info, + gf_start_show_idx + gop_coding_frames, + still_interval)) { + *use_alt_ref = 0; + break; + } } // Update the accumulator for second ref error difference. // This is intended to give an indication of how much the coded error is // increasing over time. - if (i == 1) { - sr_accumulator += next_frame.coded_error; + if (gop_coding_frames == 1) { + sr_accumulator += next_frame->coded_error; } else { - sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error); + sr_accumulator += + (next_frame->sr_coded_error - next_frame->coded_error); } } // Break out conditions. - // Break at maximum of active_max_gf_interval unless almost totally static. + // Break at maximum of active_gf_interval->max unless almost totally + // static. // // Note that the addition of a test of rc->source_alt_ref_active is // deliberate. The effect of this is that after a normal altref group even @@ -2612,59 +2549,230 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // such as a fade, the arf group spanning the transition may not be coded // at a very high quality and hence this frame (with its overlay) is a // poor golden frame to use for an extended group. - if (((i >= active_max_gf_interval) && - ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) || - ( - // Don't break out with a very short interval. - (i >= active_min_gf_interval) && - // If possible dont break very close to a kf - ((rc->frames_to_key - i) >= rc->min_gf_interval) && (i & 0x01) && - (!flash_detected) && - ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) || - (abs_mv_in_out_accumulator > abs_mv_in_out_thresh) || - (sr_accumulator > gop_intra_factor * next_frame.intra_error)))) { + if ((gop_coding_frames >= active_gf_interval->max) && + ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) { + break; + } + if ( + // Don't break out with a very short interval. + (gop_coding_frames >= active_gf_interval->min) && + // If possible dont break very close to a kf + ((rc->frames_to_key - gop_coding_frames) >= rc->min_gf_interval) && + (gop_coding_frames & 0x01) && (!flash_detected) && + ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) || + (abs_mv_in_out_accumulator > ARF_ABS_ZOOM_THRESH) || + (sr_accumulator > gop_intra_factor * next_frame->intra_error))) { break; } + } + *use_alt_ref &= zero_motion_accumulator < 0.995; + *use_alt_ref &= gop_coding_frames < lag_in_frames; + *use_alt_ref &= gop_coding_frames >= rc->min_gf_interval; + return gop_coding_frames; +} + +static RANGE get_active_gf_inverval_range( + const FRAME_INFO *frame_info, const RATE_CONTROL *rc, int arf_active_or_kf, + int gf_start_show_idx, int active_worst_quality, int last_boosted_qindex) { + RANGE active_gf_interval; +#if CONFIG_RATE_CTRL + (void)frame_info; + (void)gf_start_show_idx; + (void)active_worst_quality; + (void)last_boosted_qindex; + active_gf_interval.min = rc->min_gf_interval + arf_active_or_kf + 2; - *this_frame = next_frame; + active_gf_interval.max = 16 + arf_active_or_kf; + + if ((active_gf_interval.max <= rc->frames_to_key) && + (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) { + active_gf_interval.min = rc->frames_to_key / 2; + active_gf_interval.max = rc->frames_to_key / 2; + } +#else + int int_max_q = (int)(vp9_convert_qindex_to_q(active_worst_quality, + frame_info->bit_depth)); + int q_term = (gf_start_show_idx == 0) + ? int_max_q / 32 + : (int)(vp9_convert_qindex_to_q(last_boosted_qindex, + frame_info->bit_depth) / + 6); + active_gf_interval.min = + rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200); + active_gf_interval.min = + VPXMIN(active_gf_interval.min, rc->max_gf_interval + arf_active_or_kf); + + // The value chosen depends on the active Q range. At low Q we have + // bits to spare and are better with a smaller interval and smaller boost. + // At high Q when there are few bits to spare we are better with a longer + // interval to spread the cost of the GF. + active_gf_interval.max = 11 + arf_active_or_kf + VPXMIN(5, q_term); + + // Force max GF interval to be odd. + active_gf_interval.max = active_gf_interval.max | 0x01; + + // We have: active_gf_interval.min <= + // rc->max_gf_interval + arf_active_or_kf. + if (active_gf_interval.max < active_gf_interval.min) { + active_gf_interval.max = active_gf_interval.min; + } else { + active_gf_interval.max = + VPXMIN(active_gf_interval.max, rc->max_gf_interval + arf_active_or_kf); + } + + // Would the active max drop us out just before the near the next kf? + if ((active_gf_interval.max <= rc->frames_to_key) && + (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) { + active_gf_interval.max = rc->frames_to_key / 2; + } + active_gf_interval.max = + VPXMAX(active_gf_interval.max, active_gf_interval.min); +#endif + return active_gf_interval; +} + +static int get_arf_layers(int multi_layer_arf, int max_layers, + int coding_frame_num) { + assert(max_layers <= MAX_ARF_LAYERS); + if (multi_layer_arf) { + int layers = 0; + int i; + for (i = coding_frame_num; i > 0; i >>= 1) { + ++layers; + } + layers = VPXMIN(max_layers, layers); + return layers; + } else { + return 1; + } +} + +static void define_gf_group(VP9_COMP *cpi, int gf_start_show_idx) { + VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; + VP9EncoderConfig *const oxcf = &cpi->oxcf; + TWO_PASS *const twopass = &cpi->twopass; + const FRAME_INFO *frame_info = &cpi->frame_info; + const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info; + const FIRSTPASS_STATS *const start_pos = twopass->stats_in; + int gop_coding_frames; + + double gf_group_err = 0.0; + double gf_group_raw_error = 0.0; + double gf_group_noise = 0.0; + double gf_group_skip_pct = 0.0; + double gf_group_inactive_zone_rows = 0.0; + double gf_group_inter = 0.0; + double gf_group_motion = 0.0; + + int allow_alt_ref = is_altref_enabled(cpi); + int use_alt_ref; + + int64_t gf_group_bits; + int gf_arf_bits; + const int is_key_frame = frame_is_intra_only(cm); + // If this is a key frame or the overlay from a previous arf then + // the error score / cost of this frame has already been accounted for. + const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active; + int is_alt_ref_flash = 0; + + double gop_intra_factor; + int gop_frames; + RANGE active_gf_interval; + + // Reset the GF group data structures unless this is a key + // frame in which case it will already have been done. + if (is_key_frame == 0) { + vp9_zero(twopass->gf_group); + } + + vpx_clear_system_state(); + + active_gf_interval = get_active_gf_inverval_range( + frame_info, rc, arf_active_or_kf, gf_start_show_idx, + twopass->active_worst_quality, rc->last_boosted_qindex); + + if (cpi->multi_layer_arf) { + int arf_layers = get_arf_layers(cpi->multi_layer_arf, oxcf->enable_auto_arf, + active_gf_interval.max); + gop_intra_factor = 1.0 + 0.25 * arf_layers; + } else { + gop_intra_factor = 1.0; + } + + { + gop_coding_frames = get_gop_coding_frame_num( + &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx, + &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames); + use_alt_ref &= allow_alt_ref; } // Was the group length constrained by the requirement for a new KF? - rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0; + rc->constrained_gf_group = (gop_coding_frames >= rc->frames_to_key) ? 1 : 0; // Should we use the alternate reference frame. - if ((zero_motion_accumulator < 0.995) && allow_alt_ref && - (twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) && - (i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) { - const int forward_frames = (rc->frames_to_key - i >= i - 1) - ? i - 1 - : VPXMAX(0, rc->frames_to_key - i); + if (use_alt_ref) { + const int f_frames = + (rc->frames_to_key - gop_coding_frames >= gop_coding_frames - 1) + ? gop_coding_frames - 1 + : VPXMAX(0, rc->frames_to_key - gop_coding_frames); + const int b_frames = gop_coding_frames - 1; + const int avg_inter_frame_qindex = rc->avg_frame_qindex[INTER_FRAME]; + // TODO(angiebird): figure out why arf's location is assigned this way + const int arf_show_idx = VPXMIN(gf_start_show_idx + gop_coding_frames + 1, + fps_get_num_frames(first_pass_info)); // Calculate the boost for alt ref. - rc->gfu_boost = calc_arf_boost(cpi, forward_frames, (i - 1)); + rc->gfu_boost = + compute_arf_boost(frame_info, first_pass_info, arf_show_idx, f_frames, + b_frames, avg_inter_frame_qindex); rc->source_alt_ref_pending = 1; } else { - reset_fpf_position(twopass, start_pos); - rc->gfu_boost = VPXMIN(MAX_GF_BOOST, calc_arf_boost(cpi, (i - 1), 0)); + const int f_frames = gop_coding_frames - 1; + const int b_frames = 0; + const int avg_inter_frame_qindex = rc->avg_frame_qindex[INTER_FRAME]; + // TODO(angiebird): figure out why arf's location is assigned this way + const int gld_show_idx = + VPXMIN(gf_start_show_idx + 1, fps_get_num_frames(first_pass_info)); + const int arf_boost = + compute_arf_boost(frame_info, first_pass_info, gld_show_idx, f_frames, + b_frames, avg_inter_frame_qindex); + rc->gfu_boost = VPXMIN(MAX_GF_BOOST, arf_boost); rc->source_alt_ref_pending = 0; } #define LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR 0.2 rc->arf_active_best_quality_adjustment_factor = 1.0; - if (rc->source_alt_ref_pending && !is_lossless_requested(&cpi->oxcf) && - rc->frames_to_key <= rc->arf_active_best_quality_adjustment_window) { - rc->arf_active_best_quality_adjustment_factor = - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR + - (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) * - (rc->frames_to_key - i) / - VPXMAX(1, (rc->arf_active_best_quality_adjustment_window - i)); + rc->arf_increase_active_best_quality = 0; + + if (!is_lossless_requested(&cpi->oxcf)) { + if (rc->frames_since_key >= rc->frames_to_key) { + // Increase the active best quality in the second half of key frame + // interval. + rc->arf_active_best_quality_adjustment_factor = + LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR + + (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) * + (rc->frames_to_key - gop_coding_frames) / + (VPXMAX(1, ((rc->frames_to_key + rc->frames_since_key) / 2 - + gop_coding_frames))); + rc->arf_increase_active_best_quality = 1; + } else if ((rc->frames_to_key - gop_coding_frames) > 0) { + // Reduce the active best quality in the first half of key frame interval. + rc->arf_active_best_quality_adjustment_factor = + LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR + + (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) * + (rc->frames_since_key + gop_coding_frames) / + (VPXMAX(1, (rc->frames_to_key + rc->frames_since_key) / 2 + + gop_coding_frames)); + rc->arf_increase_active_best_quality = -1; + } } #ifdef AGGRESSIVE_VBR // Limit maximum boost based on interval length. - rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 140); + rc->gfu_boost = VPXMIN((int)rc->gfu_boost, gop_coding_frames * 140); #else - rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 200); + rc->gfu_boost = VPXMIN((int)rc->gfu_boost, gop_coding_frames * 200); #endif // Cap the ARF boost when perceptual quality AQ mode is enabled. This is @@ -2674,14 +2782,34 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (oxcf->aq_mode == PERCEPTUAL_AQ) rc->gfu_boost = VPXMIN(rc->gfu_boost, MIN_ARF_GF_BOOST); - rc->baseline_gf_interval = i - rc->source_alt_ref_pending; - - // Reset the file position. - reset_fpf_position(twopass, start_pos); + rc->baseline_gf_interval = gop_coding_frames - rc->source_alt_ref_pending; if (rc->source_alt_ref_pending) is_alt_ref_flash = detect_flash(twopass, rc->baseline_gf_interval); + { + const double av_err = get_distribution_av_err(cpi, twopass); + const double mean_mod_score = twopass->mean_mod_score; + // If the first frame is a key frame or the overlay from a previous arf then + // the error score / cost of this frame has already been accounted for. + int start_idx = arf_active_or_kf ? 1 : 0; + int j; + for (j = start_idx; j < gop_coding_frames; ++j) { + int show_idx = gf_start_show_idx + j; + const FIRSTPASS_STATS *frame_stats = + fps_get_frame_stats(first_pass_info, show_idx); + // Accumulate error score of frames in this gf group. + gf_group_err += calc_norm_frame_score(oxcf, frame_info, frame_stats, + mean_mod_score, av_err); + gf_group_raw_error += frame_stats->coded_error; + gf_group_noise += frame_stats->frame_noise_energy; + gf_group_skip_pct += frame_stats->intra_skip_pct; + gf_group_inactive_zone_rows += frame_stats->inactive_zone_rows; + gf_group_inter += frame_stats->pcnt_inter; + gf_group_motion += frame_stats->pcnt_motion; + } + } + // Calculate the bits to be allocated to the gf/arf group as a whole gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); @@ -2847,17 +2975,23 @@ static int intra_step_transition(const FIRSTPASS_STATS *this_frame, // Test for very low intra complexity which could cause false key frames #define V_LOW_INTRA 0.5 -static int test_candidate_kf(TWO_PASS *twopass, - const FIRSTPASS_STATS *last_frame, - const FIRSTPASS_STATS *this_frame, - const FIRSTPASS_STATS *next_frame) { +static int test_candidate_kf(const FIRST_PASS_INFO *first_pass_info, + int show_idx) { + const FIRSTPASS_STATS *last_frame = + fps_get_frame_stats(first_pass_info, show_idx - 1); + const FIRSTPASS_STATS *this_frame = + fps_get_frame_stats(first_pass_info, show_idx); + const FIRSTPASS_STATS *next_frame = + fps_get_frame_stats(first_pass_info, show_idx + 1); int is_viable_kf = 0; double pcnt_intra = 1.0 - this_frame->pcnt_inter; // Does the frame satisfy the primary criteria of a key frame? // See above for an explanation of the test criteria. // If so, then examine how well it predicts subsequent frames. - if (!detect_flash(twopass, -1) && !detect_flash(twopass, 0) && + detect_flash_from_frame_stats(next_frame); + if (!detect_flash_from_frame_stats(this_frame) && + !detect_flash_from_frame_stats(next_frame) && (this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) || (slide_transition(this_frame, last_frame, next_frame)) || @@ -2870,42 +3004,41 @@ static int test_candidate_kf(TWO_PASS *twopass, DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < KF_II_ERR_THRESHOLD)))) { int i; - const FIRSTPASS_STATS *start_pos = twopass->stats_in; - FIRSTPASS_STATS local_next_frame = *next_frame; double boost_score = 0.0; double old_boost_score = 0.0; double decay_accumulator = 1.0; // Examine how well the key frame predicts subsequent frames. for (i = 0; i < 16; ++i) { - double next_iiratio = (II_FACTOR * local_next_frame.intra_error / - DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)); + const FIRSTPASS_STATS *frame_stats = + fps_get_frame_stats(first_pass_info, show_idx + 1 + i); + double next_iiratio = (II_FACTOR * frame_stats->intra_error / + DOUBLE_DIVIDE_CHECK(frame_stats->coded_error)); if (next_iiratio > KF_II_MAX) next_iiratio = KF_II_MAX; // Cumulative effect of decay in prediction quality. - if (local_next_frame.pcnt_inter > 0.85) - decay_accumulator *= local_next_frame.pcnt_inter; + if (frame_stats->pcnt_inter > 0.85) + decay_accumulator *= frame_stats->pcnt_inter; else - decay_accumulator *= (0.85 + local_next_frame.pcnt_inter) / 2.0; + decay_accumulator *= (0.85 + frame_stats->pcnt_inter) / 2.0; // Keep a running total. boost_score += (decay_accumulator * next_iiratio); // Test various breakout clauses. - if ((local_next_frame.pcnt_inter < 0.05) || (next_iiratio < 1.5) || - (((local_next_frame.pcnt_inter - local_next_frame.pcnt_neutral) < - 0.20) && + if ((frame_stats->pcnt_inter < 0.05) || (next_iiratio < 1.5) || + (((frame_stats->pcnt_inter - frame_stats->pcnt_neutral) < 0.20) && (next_iiratio < 3.0)) || ((boost_score - old_boost_score) < 3.0) || - (local_next_frame.intra_error < V_LOW_INTRA)) { + (frame_stats->intra_error < V_LOW_INTRA)) { break; } old_boost_score = boost_score; // Get the next frame details - if (EOF == input_stats(twopass, &local_next_frame)) break; + if (show_idx + 1 + i == fps_get_num_frames(first_pass_info) - 1) break; } // If there is tolerable prediction for at least the next 3 frames then @@ -2913,9 +3046,6 @@ static int test_candidate_kf(TWO_PASS *twopass, if (boost_score > 30.0 && (i > 3)) { is_viable_kf = 1; } else { - // Reset the file position - reset_fpf_position(twopass, start_pos); - is_viable_kf = 0; } } @@ -2938,19 +3068,80 @@ static int test_candidate_kf(TWO_PASS *twopass, #define MAX_KF_TOT_BOOST 5400 #endif -static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - int i, j; +int vp9_get_frames_to_next_key(const VP9EncoderConfig *oxcf, + const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, + int kf_show_idx, int min_gf_interval) { + double recent_loop_decay[FRAMES_TO_CHECK_DECAY]; + int j; + int frames_to_key; + int max_frames_to_key = first_pass_info->num_frames - kf_show_idx; + max_frames_to_key = VPXMIN(max_frames_to_key, oxcf->key_freq); + + // Initialize the decay rates for the recent frames to check + for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0; + // Find the next keyframe. + if (!oxcf->auto_key) { + frames_to_key = max_frames_to_key; + } else { + frames_to_key = 1; + while (frames_to_key < max_frames_to_key) { + // Provided that we are not at the end of the file... + if (kf_show_idx + frames_to_key + 1 < first_pass_info->num_frames) { + double loop_decay_rate; + double decay_accumulator; + const FIRSTPASS_STATS *next_frame = fps_get_frame_stats( + first_pass_info, kf_show_idx + frames_to_key + 1); + + // Check for a scene cut. + if (test_candidate_kf(first_pass_info, kf_show_idx + frames_to_key)) + break; + + // How fast is the prediction quality decaying? + loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame); + + // We want to know something about the recent past... rather than + // as used elsewhere where we are concerned with decay in prediction + // quality since the last GF or KF. + recent_loop_decay[(frames_to_key - 1) % FRAMES_TO_CHECK_DECAY] = + loop_decay_rate; + decay_accumulator = 1.0; + for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) + decay_accumulator *= recent_loop_decay[j]; + + // Special check for transition or high motion followed by a + // static scene. + if ((frames_to_key - 1) > min_gf_interval && loop_decay_rate >= 0.999 && + decay_accumulator < 0.9) { + int still_interval = oxcf->key_freq - (frames_to_key - 1); + // TODO(angiebird): Figure out why we use "+1" here + int show_idx = kf_show_idx + frames_to_key; + if (check_transition_to_still(first_pass_info, show_idx, + still_interval)) { + break; + } + } + } + ++frames_to_key; + } + } + return frames_to_key; +} + +static void find_next_key_frame(VP9_COMP *cpi, int kf_show_idx) { + int i; RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; const VP9EncoderConfig *const oxcf = &cpi->oxcf; - const FIRSTPASS_STATS first_frame = *this_frame; + const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info; + const FRAME_INFO *frame_info = &cpi->frame_info; const FIRSTPASS_STATS *const start_position = twopass->stats_in; + const FIRSTPASS_STATS *keyframe_stats = + fps_get_frame_stats(first_pass_info, kf_show_idx); FIRSTPASS_STATS next_frame; - FIRSTPASS_STATS last_frame; int kf_bits = 0; int64_t max_kf_bits; - double decay_accumulator = 1.0; double zero_motion_accumulator = 1.0; double zero_motion_sum = 0.0; double zero_motion_avg; @@ -2962,10 +3153,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double kf_mod_err = 0.0; double kf_raw_err = 0.0; double kf_group_err = 0.0; - double recent_loop_decay[FRAMES_TO_CHECK_DECAY]; double sr_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; const double av_err = get_distribution_av_err(cpi, twopass); + const double mean_mod_score = twopass->mean_mod_score; vp9_zero(next_frame); cpi->common.frame_type = KEY_FRAME; @@ -2989,96 +3180,29 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->kf_group_bits = 0; // Total bits available to kf group twopass->kf_group_error_left = 0.0; // Group modified error score. - kf_raw_err = this_frame->intra_error; - kf_mod_err = - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); - - // Initialize the decay rates for the recent frames to check - for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0; - - // Find the next keyframe. - i = 0; - while (twopass->stats_in < twopass->stats_in_end && - rc->frames_to_key < cpi->oxcf.key_freq) { - // Accumulate kf group error. - kf_group_err += - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); - - // Load the next frame's stats. - last_frame = *this_frame; - input_stats(twopass, this_frame); + kf_raw_err = keyframe_stats->intra_error; + kf_mod_err = calc_norm_frame_score(oxcf, frame_info, keyframe_stats, + mean_mod_score, av_err); - // Provided that we are not at the end of the file... - if (cpi->oxcf.auto_key && twopass->stats_in < twopass->stats_in_end) { - double loop_decay_rate; - - // Check for a scene cut. - if (test_candidate_kf(twopass, &last_frame, this_frame, - twopass->stats_in)) - break; - - // How fast is the prediction quality decaying? - loop_decay_rate = get_prediction_decay_rate(cpi, twopass->stats_in); - - // We want to know something about the recent past... rather than - // as used elsewhere where we are concerned with decay in prediction - // quality since the last GF or KF. - recent_loop_decay[i % FRAMES_TO_CHECK_DECAY] = loop_decay_rate; - decay_accumulator = 1.0; - for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) - decay_accumulator *= recent_loop_decay[j]; - - // Special check for transition or high motion followed by a - // static scene. - if (detect_transition_to_still(cpi, i, cpi->oxcf.key_freq - i, - loop_decay_rate, decay_accumulator)) - break; - - // Step on to the next frame. - ++rc->frames_to_key; - - // If we don't have a real key frame within the next two - // key_freq intervals then break out of the loop. - if (rc->frames_to_key >= 2 * cpi->oxcf.key_freq) break; - } else { - ++rc->frames_to_key; - } - ++i; - } + rc->frames_to_key = vp9_get_frames_to_next_key( + oxcf, frame_info, first_pass_info, kf_show_idx, rc->min_gf_interval); // If there is a max kf interval set by the user we must obey it. // We already breakout of the loop above at 2x max. // This code centers the extra kf if the actual natural interval // is between 1x and 2x. - if (cpi->oxcf.auto_key && rc->frames_to_key > cpi->oxcf.key_freq) { - FIRSTPASS_STATS tmp_frame = first_frame; - - rc->frames_to_key /= 2; - - // Reset to the start of the group. - reset_fpf_position(twopass, start_position); - - kf_group_err = 0.0; - - // Rescan to get the correct error data for the forced kf group. - for (i = 0; i < rc->frames_to_key; ++i) { - kf_group_err += - calculate_norm_frame_score(cpi, twopass, oxcf, &tmp_frame, av_err); - input_stats(twopass, &tmp_frame); - } - rc->next_key_frame_forced = 1; - } else if (twopass->stats_in == twopass->stats_in_end || - rc->frames_to_key >= cpi->oxcf.key_freq) { + if (rc->frames_to_key >= cpi->oxcf.key_freq) { rc->next_key_frame_forced = 1; } else { rc->next_key_frame_forced = 0; } - // Special case for the last key frame of the file. - if (twopass->stats_in >= twopass->stats_in_end) { + for (i = 0; i < rc->frames_to_key; ++i) { + const FIRSTPASS_STATS *frame_stats = + fps_get_frame_stats(first_pass_info, kf_show_idx + i); // Accumulate kf group error. - kf_group_err += - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); + kf_group_err += calc_norm_frame_score(oxcf, frame_info, frame_stats, + mean_mod_score, av_err); } // Calculate the number of bits that should be assigned to the kf group. @@ -3103,9 +3227,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } twopass->kf_group_bits = VPXMAX(0, twopass->kf_group_bits); - // Reset the first pass file position. - reset_fpf_position(twopass, start_position); - // Scan through the kf group collating various stats used to determine // how many bits to spend on it. boost_score = 0.0; @@ -3144,8 +3265,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Monitor for static sections. // First frame in kf group the second ref indicator is invalid. if (i > 0) { - zero_motion_accumulator = VPXMIN( - zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); + zero_motion_accumulator = + VPXMIN(zero_motion_accumulator, + get_zero_motion_factor(&cpi->frame_info, &next_frame)); } else { zero_motion_accumulator = next_frame.pcnt_inter - next_frame.pcnt_motion; @@ -3214,6 +3336,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { gf_group->bit_allocation[0] = kf_bits; gf_group->update_type[0] = KF_UPDATE; gf_group->rf_level[0] = KF_STD; + gf_group->layer_depth[0] = 0; // Note the total error score of the kf group minus the key frame itself. twopass->kf_group_error_left = (kf_group_err - kf_mod_err); @@ -3227,11 +3350,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Default to normal-sized frame on keyframes. cpi->rc.next_frame_size_selector = UNSCALED; } -#define ARF_ACTIVE_BEST_QUALITY_ADJUSTMENT_WINDOW_SIZE 64 - // TODO(ravi.chaudhary@ittiam.com): Experiment without the below min - // condition. This might be helpful for small key frame intervals. - rc->arf_active_best_quality_adjustment_window = - VPXMIN(ARF_ACTIVE_BEST_QUALITY_ADJUSTMENT_WINDOW_SIZE, rc->frames_to_key); } static int is_skippable_frame(const VP9_COMP *cpi) { @@ -3259,6 +3377,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; FIRSTPASS_STATS this_frame; + const int show_idx = cm->current_video_frame; if (!twopass->stats_in) return; @@ -3341,18 +3460,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // Keyframe and section processing. if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) { - FIRSTPASS_STATS this_frame_copy; - this_frame_copy = this_frame; // Define next KF group and assign bits to it. - find_next_key_frame(cpi, &this_frame); - this_frame = this_frame_copy; + find_next_key_frame(cpi, show_idx); } else { cm->frame_type = INTER_FRAME; } // Define a new GF/ARF group. (Should always enter here for key frames). if (rc->frames_till_gf_update_due == 0) { - define_gf_group(cpi, &this_frame); + define_gf_group(cpi, show_idx); rc->frames_till_gf_update_due = rc->baseline_gf_interval; @@ -3509,3 +3625,70 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { } } } + +#if CONFIG_RATE_CTRL +// Under CONFIG_RATE_CTRL, once the first_pass_info is ready, the number of +// coding frames (including show frame and alt ref) can be determined. +int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf, + const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, + int multi_layer_arf, int allow_alt_ref) { + int coding_frame_num = 0; + RATE_CONTROL rc; + RANGE active_gf_interval; + int arf_layers; + double gop_intra_factor; + int use_alt_ref; + int gop_coding_frames; + int gop_show_frames; + int show_idx = 0; + int arf_active_or_kf = 1; + rc.static_scene_max_gf_interval = 250; + vp9_rc_init(oxcf, 1, &rc); + + while (show_idx < first_pass_info->num_frames) { + if (rc.frames_to_key == 0) { + rc.frames_to_key = vp9_get_frames_to_next_key( + oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval); + arf_active_or_kf = 1; + } else { + } + + { + int dummy = 0; + active_gf_interval = get_active_gf_inverval_range( + frame_info, &rc, arf_active_or_kf, show_idx, dummy, dummy); + } + + arf_layers = get_arf_layers(multi_layer_arf, oxcf->enable_auto_arf, + active_gf_interval.max); + if (multi_layer_arf) { + gop_intra_factor = 1.0 + 0.25 * arf_layers; + } else { + gop_intra_factor = 1.0; + } + + gop_coding_frames = get_gop_coding_frame_num( + &use_alt_ref, frame_info, first_pass_info, &rc, show_idx, + &active_gf_interval, gop_intra_factor, oxcf->lag_in_frames); + + use_alt_ref &= allow_alt_ref; + + rc.source_alt_ref_active = use_alt_ref; + arf_active_or_kf = use_alt_ref; + gop_show_frames = gop_coding_frames - use_alt_ref; + rc.frames_to_key -= gop_show_frames; + rc.frames_since_key += gop_show_frames; + show_idx += gop_show_frames; + coding_frame_num += gop_show_frames + use_alt_ref; + } + return coding_frame_num; +} +#endif + +FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass) { + return twopass->this_frame_stats; +} +FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *twopass) { + return twopass->total_stats; +} diff --git a/libvpx/vp9/encoder/vp9_firstpass.h b/libvpx/vp9/encoder/vp9_firstpass.h index a0a96e6ef..cfbc143c3 100644 --- a/libvpx/vp9/encoder/vp9_firstpass.h +++ b/libvpx/vp9/encoder/vp9_firstpass.h @@ -13,6 +13,7 @@ #include <assert.h> +#include "vp9/common/vp9_onyxc_int.h" #include "vp9/encoder/vp9_lookahead.h" #include "vp9/encoder/vp9_ratectrl.h" @@ -147,6 +148,30 @@ typedef struct { } GF_GROUP; typedef struct { + const FIRSTPASS_STATS *stats; + int num_frames; +} FIRST_PASS_INFO; + +static INLINE void fps_init_first_pass_info(FIRST_PASS_INFO *first_pass_info, + const FIRSTPASS_STATS *stats, + int num_frames) { + first_pass_info->stats = stats; + first_pass_info->num_frames = num_frames; +} + +static INLINE int fps_get_num_frames(const FIRST_PASS_INFO *first_pass_info) { + return first_pass_info->num_frames; +} + +static INLINE const FIRSTPASS_STATS *fps_get_frame_stats( + const FIRST_PASS_INFO *first_pass_info, int show_idx) { + if (show_idx < 0 || show_idx >= first_pass_info->num_frames) { + return NULL; + } + return &first_pass_info->stats[show_idx]; +} + +typedef struct { unsigned int section_intra_rating; unsigned int key_frame_section_intra_rating; FIRSTPASS_STATS total_stats; @@ -154,6 +179,7 @@ typedef struct { const FIRSTPASS_STATS *stats_in; const FIRSTPASS_STATS *stats_in_start; const FIRSTPASS_STATS *stats_in_end; + FIRST_PASS_INFO first_pass_info; FIRSTPASS_STATS total_left_stats; int first_pass_done; int64_t bits_left; @@ -192,6 +218,7 @@ typedef struct { int extend_maxq; int extend_minq_fast; int arnr_strength_adjustment; + int last_qindex_of_arf_layer[MAX_ARF_LAYERS]; GF_GROUP gf_group; } TWO_PASS; @@ -219,6 +246,21 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi); void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width, int *scaled_frame_height); +struct VP9EncoderConfig; +int vp9_get_frames_to_next_key(const struct VP9EncoderConfig *oxcf, + const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, + int kf_show_idx, int min_gf_interval); +#if CONFIG_RATE_CTRL +int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf, + const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, + int multi_layer_arf, int allow_alt_ref); +#endif + +FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *two_pass); +FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *two_pass); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_lookahead.c b/libvpx/vp9/encoder/vp9_lookahead.c index 392cd5d41..97838c38e 100644 --- a/libvpx/vp9/encoder/vp9_lookahead.c +++ b/libvpx/vp9/encoder/vp9_lookahead.c @@ -64,6 +64,7 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width, unsigned int i; ctx->max_sz = depth; ctx->buf = calloc(depth, sizeof(*ctx->buf)); + ctx->next_show_idx = 0; if (!ctx->buf) goto bail; for (i = 0; i < depth; i++) if (vpx_alloc_frame_buffer( @@ -81,12 +82,16 @@ bail: } #define USE_PARTIAL_COPY 0 +int vp9_lookahead_full(const struct lookahead_ctx *ctx) { + return ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz; +} + +int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx) { + return ctx->next_show_idx; +} int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, - int64_t ts_start, int64_t ts_end, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif + int64_t ts_start, int64_t ts_end, int use_highbitdepth, vpx_enc_frame_flags_t flags) { struct lookahead_entry *buf; #if USE_PARTIAL_COPY @@ -101,8 +106,12 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int subsampling_x = src->subsampling_x; int subsampling_y = src->subsampling_y; int larger_dimensions, new_dimensions; +#if !CONFIG_VP9_HIGHBITDEPTH + (void)use_highbitdepth; + assert(use_highbitdepth == 0); +#endif - if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1; + if (vp9_lookahead_full(ctx)) return 1; ctx->sz++; buf = pop(ctx, &ctx->write_idx); @@ -184,6 +193,8 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, buf->ts_start = ts_start; buf->ts_end = ts_end; buf->flags = flags; + buf->show_idx = ctx->next_show_idx; + ++ctx->next_show_idx; return 0; } diff --git a/libvpx/vp9/encoder/vp9_lookahead.h b/libvpx/vp9/encoder/vp9_lookahead.h index c627bede2..dbbe3af58 100644 --- a/libvpx/vp9/encoder/vp9_lookahead.h +++ b/libvpx/vp9/encoder/vp9_lookahead.h @@ -25,6 +25,7 @@ struct lookahead_entry { YV12_BUFFER_CONFIG img; int64_t ts_start; int64_t ts_end; + int show_idx; /*The show_idx of this frame*/ vpx_enc_frame_flags_t flags; }; @@ -32,10 +33,12 @@ struct lookahead_entry { #define MAX_PRE_FRAMES 1 struct lookahead_ctx { - int max_sz; /* Absolute size of the queue */ - int sz; /* Number of buffers currently in the queue */ - int read_idx; /* Read index */ - int write_idx; /* Write index */ + int max_sz; /* Absolute size of the queue */ + int sz; /* Number of buffers currently in the queue */ + int read_idx; /* Read index */ + int write_idx; /* Write index */ + int next_show_idx; /* The show_idx that will be assigned to the next frame + being pushed in the queue*/ struct lookahead_entry *buf; /* Buffer list */ }; @@ -57,6 +60,23 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width, */ void vp9_lookahead_destroy(struct lookahead_ctx *ctx); +/**\brief Check if lookahead is full + * + * \param[in] ctx Pointer to the lookahead context + * + * Return 1 if lookahead is full, otherwise return 0. + */ +int vp9_lookahead_full(const struct lookahead_ctx *ctx); + +/**\brief Return the next_show_idx + * + * \param[in] ctx Pointer to the lookahead context + * + * Return the show_idx that will be assigned to the next + * frame pushed by vp9_lookahead_push() + */ +int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx); + /**\brief Enqueue a source buffer * * This function will copy the source image into a new framebuffer with @@ -73,10 +93,7 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx); * \param[in] active_map Map that specifies which macroblock is active */ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, - int64_t ts_start, int64_t ts_end, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif + int64_t ts_start, int64_t ts_end, int use_highbitdepth, vpx_enc_frame_flags_t flags); /**\brief Get the next source buffer to encode diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c index d1688f993..ac29f36ec 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.c +++ b/libvpx/vp9/encoder/vp9_mcomp.c @@ -1731,239 +1731,99 @@ static int exhaustive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, #define MAX_RANGE 256 #define MIN_INTERVAL 1 #if CONFIG_NON_GREEDY_MV - -#define LOG2_TABLE_SIZE 1024 -static const int log2_table[LOG2_TABLE_SIZE] = { - 0, // This is a dummy value - 0, 1048576, 1661954, 2097152, 2434718, 2710530, 2943725, - 3145728, 3323907, 3483294, 3627477, 3759106, 3880192, 3992301, - 4096672, 4194304, 4286015, 4372483, 4454275, 4531870, 4605679, - 4676053, 4743299, 4807682, 4869436, 4928768, 4985861, 5040877, - 5093962, 5145248, 5194851, 5242880, 5289431, 5334591, 5378443, - 5421059, 5462508, 5502851, 5542146, 5580446, 5617800, 5654255, - 5689851, 5724629, 5758625, 5791875, 5824409, 5856258, 5887450, - 5918012, 5947969, 5977344, 6006160, 6034437, 6062195, 6089453, - 6116228, 6142538, 6168398, 6193824, 6218829, 6243427, 6267632, - 6291456, 6314910, 6338007, 6360756, 6383167, 6405252, 6427019, - 6448477, 6469635, 6490501, 6511084, 6531390, 6551427, 6571202, - 6590722, 6609993, 6629022, 6647815, 6666376, 6684713, 6702831, - 6720734, 6738427, 6755916, 6773205, 6790299, 6807201, 6823917, - 6840451, 6856805, 6872985, 6888993, 6904834, 6920510, 6936026, - 6951384, 6966588, 6981641, 6996545, 7011304, 7025920, 7040397, - 7054736, 7068940, 7083013, 7096956, 7110771, 7124461, 7138029, - 7151476, 7164804, 7178017, 7191114, 7204100, 7216974, 7229740, - 7242400, 7254954, 7267405, 7279754, 7292003, 7304154, 7316208, - 7328167, 7340032, 7351805, 7363486, 7375079, 7386583, 7398000, - 7409332, 7420579, 7431743, 7442826, 7453828, 7464751, 7475595, - 7486362, 7497053, 7507669, 7518211, 7528680, 7539077, 7549404, - 7559660, 7569847, 7579966, 7590017, 7600003, 7609923, 7619778, - 7629569, 7639298, 7648964, 7658569, 7668114, 7677598, 7687023, - 7696391, 7705700, 7714952, 7724149, 7733289, 7742375, 7751407, - 7760385, 7769310, 7778182, 7787003, 7795773, 7804492, 7813161, - 7821781, 7830352, 7838875, 7847350, 7855777, 7864158, 7872493, - 7880782, 7889027, 7897226, 7905381, 7913492, 7921561, 7929586, - 7937569, 7945510, 7953410, 7961268, 7969086, 7976864, 7984602, - 7992301, 7999960, 8007581, 8015164, 8022709, 8030217, 8037687, - 8045121, 8052519, 8059880, 8067206, 8074496, 8081752, 8088973, - 8096159, 8103312, 8110431, 8117516, 8124569, 8131589, 8138576, - 8145532, 8152455, 8159347, 8166208, 8173037, 8179836, 8186605, - 8193343, 8200052, 8206731, 8213380, 8220001, 8226593, 8233156, - 8239690, 8246197, 8252676, 8259127, 8265550, 8271947, 8278316, - 8284659, 8290976, 8297266, 8303530, 8309768, 8315981, 8322168, - 8328330, 8334467, 8340579, 8346667, 8352730, 8358769, 8364784, - 8370775, 8376743, 8382687, 8388608, 8394506, 8400381, 8406233, - 8412062, 8417870, 8423655, 8429418, 8435159, 8440878, 8446576, - 8452252, 8457908, 8463542, 8469155, 8474748, 8480319, 8485871, - 8491402, 8496913, 8502404, 8507875, 8513327, 8518759, 8524171, - 8529564, 8534938, 8540293, 8545629, 8550947, 8556245, 8561525, - 8566787, 8572031, 8577256, 8582464, 8587653, 8592825, 8597980, - 8603116, 8608236, 8613338, 8618423, 8623491, 8628542, 8633576, - 8638593, 8643594, 8648579, 8653547, 8658499, 8663434, 8668354, - 8673258, 8678145, 8683017, 8687874, 8692715, 8697540, 8702350, - 8707145, 8711925, 8716690, 8721439, 8726174, 8730894, 8735599, - 8740290, 8744967, 8749628, 8754276, 8758909, 8763528, 8768134, - 8772725, 8777302, 8781865, 8786415, 8790951, 8795474, 8799983, - 8804478, 8808961, 8813430, 8817886, 8822328, 8826758, 8831175, - 8835579, 8839970, 8844349, 8848715, 8853068, 8857409, 8861737, - 8866053, 8870357, 8874649, 8878928, 8883195, 8887451, 8891694, - 8895926, 8900145, 8904353, 8908550, 8912734, 8916908, 8921069, - 8925220, 8929358, 8933486, 8937603, 8941708, 8945802, 8949885, - 8953957, 8958018, 8962068, 8966108, 8970137, 8974155, 8978162, - 8982159, 8986145, 8990121, 8994086, 8998041, 9001986, 9005920, - 9009844, 9013758, 9017662, 9021556, 9025440, 9029314, 9033178, - 9037032, 9040877, 9044711, 9048536, 9052352, 9056157, 9059953, - 9063740, 9067517, 9071285, 9075044, 9078793, 9082533, 9086263, - 9089985, 9093697, 9097400, 9101095, 9104780, 9108456, 9112123, - 9115782, 9119431, 9123072, 9126704, 9130328, 9133943, 9137549, - 9141146, 9144735, 9148316, 9151888, 9155452, 9159007, 9162554, - 9166092, 9169623, 9173145, 9176659, 9180165, 9183663, 9187152, - 9190634, 9194108, 9197573, 9201031, 9204481, 9207923, 9211357, - 9214784, 9218202, 9221613, 9225017, 9228412, 9231800, 9235181, - 9238554, 9241919, 9245277, 9248628, 9251971, 9255307, 9258635, - 9261956, 9265270, 9268577, 9271876, 9275169, 9278454, 9281732, - 9285002, 9288266, 9291523, 9294773, 9298016, 9301252, 9304481, - 9307703, 9310918, 9314126, 9317328, 9320523, 9323711, 9326892, - 9330067, 9333235, 9336397, 9339552, 9342700, 9345842, 9348977, - 9352106, 9355228, 9358344, 9361454, 9364557, 9367654, 9370744, - 9373828, 9376906, 9379978, 9383043, 9386102, 9389155, 9392202, - 9395243, 9398278, 9401306, 9404329, 9407345, 9410356, 9413360, - 9416359, 9419351, 9422338, 9425319, 9428294, 9431263, 9434226, - 9437184, 9440136, 9443082, 9446022, 9448957, 9451886, 9454809, - 9457726, 9460638, 9463545, 9466446, 9469341, 9472231, 9475115, - 9477994, 9480867, 9483735, 9486597, 9489454, 9492306, 9495152, - 9497993, 9500828, 9503659, 9506484, 9509303, 9512118, 9514927, - 9517731, 9520530, 9523324, 9526112, 9528895, 9531674, 9534447, - 9537215, 9539978, 9542736, 9545489, 9548237, 9550980, 9553718, - 9556451, 9559179, 9561903, 9564621, 9567335, 9570043, 9572747, - 9575446, 9578140, 9580830, 9583514, 9586194, 9588869, 9591540, - 9594205, 9596866, 9599523, 9602174, 9604821, 9607464, 9610101, - 9612735, 9615363, 9617987, 9620607, 9623222, 9625832, 9628438, - 9631040, 9633637, 9636229, 9638818, 9641401, 9643981, 9646556, - 9649126, 9651692, 9654254, 9656812, 9659365, 9661914, 9664459, - 9666999, 9669535, 9672067, 9674594, 9677118, 9679637, 9682152, - 9684663, 9687169, 9689672, 9692170, 9694665, 9697155, 9699641, - 9702123, 9704601, 9707075, 9709545, 9712010, 9714472, 9716930, - 9719384, 9721834, 9724279, 9726721, 9729159, 9731593, 9734024, - 9736450, 9738872, 9741291, 9743705, 9746116, 9748523, 9750926, - 9753326, 9755721, 9758113, 9760501, 9762885, 9765266, 9767642, - 9770015, 9772385, 9774750, 9777112, 9779470, 9781825, 9784175, - 9786523, 9788866, 9791206, 9793543, 9795875, 9798204, 9800530, - 9802852, 9805170, 9807485, 9809797, 9812104, 9814409, 9816710, - 9819007, 9821301, 9823591, 9825878, 9828161, 9830441, 9832718, - 9834991, 9837261, 9839527, 9841790, 9844050, 9846306, 9848559, - 9850808, 9853054, 9855297, 9857537, 9859773, 9862006, 9864235, - 9866462, 9868685, 9870904, 9873121, 9875334, 9877544, 9879751, - 9881955, 9884155, 9886352, 9888546, 9890737, 9892925, 9895109, - 9897291, 9899469, 9901644, 9903816, 9905985, 9908150, 9910313, - 9912473, 9914629, 9916783, 9918933, 9921080, 9923225, 9925366, - 9927504, 9929639, 9931771, 9933900, 9936027, 9938150, 9940270, - 9942387, 9944502, 9946613, 9948721, 9950827, 9952929, 9955029, - 9957126, 9959219, 9961310, 9963398, 9965484, 9967566, 9969645, - 9971722, 9973796, 9975866, 9977934, 9980000, 9982062, 9984122, - 9986179, 9988233, 9990284, 9992332, 9994378, 9996421, 9998461, - 10000498, 10002533, 10004565, 10006594, 10008621, 10010644, 10012665, - 10014684, 10016700, 10018713, 10020723, 10022731, 10024736, 10026738, - 10028738, 10030735, 10032729, 10034721, 10036710, 10038697, 10040681, - 10042662, 10044641, 10046617, 10048591, 10050562, 10052530, 10054496, - 10056459, 10058420, 10060379, 10062334, 10064287, 10066238, 10068186, - 10070132, 10072075, 10074016, 10075954, 10077890, 10079823, 10081754, - 10083682, 10085608, 10087532, 10089453, 10091371, 10093287, 10095201, - 10097112, 10099021, 10100928, 10102832, 10104733, 10106633, 10108529, - 10110424, 10112316, 10114206, 10116093, 10117978, 10119861, 10121742, - 10123620, 10125495, 10127369, 10129240, 10131109, 10132975, 10134839, - 10136701, 10138561, 10140418, 10142273, 10144126, 10145976, 10147825, - 10149671, 10151514, 10153356, 10155195, 10157032, 10158867, 10160699, - 10162530, 10164358, 10166184, 10168007, 10169829, 10171648, 10173465, - 10175280, 10177093, 10178904, 10180712, 10182519, 10184323, 10186125, - 10187925, 10189722, 10191518, 10193311, 10195103, 10196892, 10198679, - 10200464, 10202247, 10204028, 10205806, 10207583, 10209357, 10211130, - 10212900, 10214668, 10216435, 10218199, 10219961, 10221721, 10223479, - 10225235, 10226989, 10228741, 10230491, 10232239, 10233985, 10235728, - 10237470, 10239210, 10240948, 10242684, 10244417, 10246149, 10247879, - 10249607, 10251333, 10253057, 10254779, 10256499, 10258217, 10259933, - 10261647, 10263360, 10265070, 10266778, 10268485, 10270189, 10271892, - 10273593, 10275292, 10276988, 10278683, 10280376, 10282068, 10283757, - 10285444, 10287130, 10288814, 10290495, 10292175, 10293853, 10295530, - 10297204, 10298876, 10300547, 10302216, 10303883, 10305548, 10307211, - 10308873, 10310532, 10312190, 10313846, 10315501, 10317153, 10318804, - 10320452, 10322099, 10323745, 10325388, 10327030, 10328670, 10330308, - 10331944, 10333578, 10335211, 10336842, 10338472, 10340099, 10341725, - 10343349, 10344971, 10346592, 10348210, 10349828, 10351443, 10353057, - 10354668, 10356279, 10357887, 10359494, 10361099, 10362702, 10364304, - 10365904, 10367502, 10369099, 10370694, 10372287, 10373879, 10375468, - 10377057, 10378643, 10380228, 10381811, 10383393, 10384973, 10386551, - 10388128, 10389703, 10391276, 10392848, 10394418, 10395986, 10397553, - 10399118, 10400682, 10402244, 10403804, 10405363, 10406920, 10408476, - 10410030, 10411582, 10413133, 10414682, 10416230, 10417776, 10419320, - 10420863, 10422404, 10423944, 10425482, 10427019, 10428554, 10430087, - 10431619, 10433149, 10434678, 10436206, 10437731, 10439256, 10440778, - 10442299, 10443819, 10445337, 10446854, 10448369, 10449882, 10451394, - 10452905, 10454414, 10455921, 10457427, 10458932, 10460435, 10461936, - 10463436, 10464935, 10466432, 10467927, 10469422, 10470914, 10472405, - 10473895, 10475383, 10476870, 10478355, 10479839, 10481322, 10482802, - 10484282, -}; - -#define LOG2_PRECISION 20 -static int64_t log2_approximation(int64_t v) { - assert(v > 0); - if (v < LOG2_TABLE_SIZE) { - return log2_table[v]; - } else { - // use linear approximation when v >= 2^10 - const int slope = - 1477; // slope = 1 / (log(2) * 1024) * (1 << LOG2_PRECISION) - assert(LOG2_TABLE_SIZE == 1 << 10); - - return slope * (v - LOG2_TABLE_SIZE) + (10 << LOG2_PRECISION); - } -} - -int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs, - int mv_num) { - int i; - int update = 0; - int64_t best_cost = 0; - vpx_clear_system_state(); - for (i = 0; i < mv_num; ++i) { - if (nb_mvs[i].as_int != INVALID_MV) { - MV nb_mv = nb_mvs[i].as_mv; - const int64_t row_diff = abs(mv->row - nb_mv.row); - const int64_t col_diff = abs(mv->col - nb_mv.col); - const int64_t cost = - log2_approximation(1 + row_diff * row_diff + col_diff * col_diff); - if (update == 0) { - best_cost = cost; - update = 1; - } else { - best_cost = cost < best_cost ? cost : best_cost; +static int64_t exhaustive_mesh_search_multi_step( + MV *best_mv, const MV *center_mv, int range, int step, + const struct buf_2d *src, const struct buf_2d *pre, int lambda, + const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits, + const vp9_variance_fn_ptr_t *fn_ptr) { + int64_t best_sad; + int r, c; + int start_col, end_col, start_row, end_row; + *best_mv = *center_mv; + best_sad = + ((int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, center_mv), pre->stride) + << LOG2_PRECISION) + + lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num); + start_row = VPXMAX(center_mv->row - range, mv_limits->row_min); + start_col = VPXMAX(center_mv->col - range, mv_limits->col_min); + end_row = VPXMIN(center_mv->row + range, mv_limits->row_max); + end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); + for (r = start_row; r <= end_row; r += step) { + for (c = start_col; c <= end_col; c += step) { + const MV mv = { r, c }; + int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, &mv), pre->stride) + << LOG2_PRECISION; + if (sad < best_sad) { + sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } } } } - return best_cost; + return best_sad; } -static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv, - int range, int step, - const vp9_variance_fn_ptr_t *fn_ptr, - const MV *center_mv, int lambda, - const int_mv *nb_full_mvs, - int full_mv_num) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - MV fcenter_mv = { center_mv->row, center_mv->col }; +static int64_t exhaustive_mesh_search_single_step( + MV *best_mv, const MV *center_mv, int range, const struct buf_2d *src, + const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs, + int full_mv_num, const MvLimits *mv_limits, + const vp9_variance_fn_ptr_t *fn_ptr) { int64_t best_sad; int r, c, i; int start_col, end_col, start_row, end_row; - int col_step = (step > 1) ? step : 4; - assert(step >= 1); - - clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max, - x->mv_limits.row_min, x->mv_limits.row_max); - *best_mv = fcenter_mv; + *best_mv = *center_mv; best_sad = - ((int64_t)fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &fcenter_mv), - in_what->stride) + ((int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, center_mv), pre->stride) << LOG2_PRECISION) + - lambda * vp9_nb_mvs_inconsistency(&fcenter_mv, nb_full_mvs, full_mv_num); - start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row); - start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col); - end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row); - end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col); + lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num); + start_row = VPXMAX(center_mv->row - range, mv_limits->row_min); + start_col = VPXMAX(center_mv->col - range, mv_limits->col_min); + end_row = VPXMIN(center_mv->row + range, mv_limits->row_max); + end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); + for (r = start_row; r <= end_row; r += 1) { + c = start_col; + // sdx8f may not be available some block size + if (fn_ptr->sdx8f) { + while (c + 7 <= end_col) { + unsigned int sads[8]; + const MV mv = { r, c }; + const uint8_t *buf = get_buf_from_mv(pre, &mv); + fn_ptr->sdx8f(src->buf, src->stride, buf, pre->stride, sads); + + for (i = 0; i < 8; ++i) { + int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; + if (sad < best_sad) { + const MV mv = { r, c + i }; + sad += lambda * + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + } + c += 8; + } + } + while (c + 3 <= end_col) { + unsigned int sads[4]; + const uint8_t *addrs[4]; + for (i = 0; i < 4; ++i) { + const MV mv = { r, c + i }; + addrs[i] = get_buf_from_mv(pre, &mv); + } + fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads); - for (r = start_row; r <= end_row; r += step) { - for (c = start_col; c <= end_col; c += col_step) { - // Step > 1 means we are not checking every location in this pass. - if (step > 1) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c }; - int64_t sad = - (int64_t)fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride) - << LOG2_PRECISION; + for (i = 0; i < 4; ++i) { + int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; if (sad < best_sad) { + const MV mv = { r, c + i }; sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); if (sad < best_sad) { @@ -1971,53 +1831,48 @@ static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv, *best_mv = mv; } } - } else { - // 4 sads in a single call if we are checking every location - if (c + 3 <= end_col) { - unsigned int sads[4]; - const uint8_t *addrs[4]; - for (i = 0; i < 4; ++i) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; - addrs[i] = get_buf_from_mv(in_what, &mv); - } - fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); - - for (i = 0; i < 4; ++i) { - int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; - if (sad < best_sad) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; - sad += lambda * - vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } - } - } - } else { - for (i = 0; i < end_col - c; ++i) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; - int64_t sad = (int64_t)fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), - in_what->stride) - << LOG2_PRECISION; - if (sad < best_sad) { - sad += lambda * - vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } - } - } + } + c += 4; + } + while (c <= end_col) { + const MV mv = { r, c }; + int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, &mv), pre->stride) + << LOG2_PRECISION; + if (sad < best_sad) { + sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; } } + c += 1; } } - return best_sad; } +static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv, + int range, int step, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *center_mv, int lambda, + const int_mv *nb_full_mvs, + int full_mv_num) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *src = &x->plane[0].src; + const struct buf_2d *pre = &xd->plane[0].pre[0]; + assert(step >= 1); + assert(is_mv_in(&x->mv_limits, center_mv)); + if (step == 1) { + return exhaustive_mesh_search_single_step( + best_mv, center_mv, range, src, pre, lambda, nb_full_mvs, full_mv_num, + &x->mv_limits, fn_ptr); + } + return exhaustive_mesh_search_multi_step(best_mv, center_mv, range, step, src, + pre, lambda, nb_full_mvs, + full_mv_num, &x->mv_limits, fn_ptr); +} + static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x, MV *centre_mv_full, const vp9_variance_fn_ptr_t *fn_ptr, @@ -2031,7 +1886,6 @@ static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x, int interval = sf->mesh_patterns[0].interval; int range = sf->mesh_patterns[0].range; int baseline_interval_divisor; - const MV dummy_mv = { 0, 0 }; // Trap illegal values for interval and range for this function. if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || @@ -2067,19 +1921,18 @@ static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x, } } - bestsme = vp9_get_mvpred_var(x, &temp_mv, &dummy_mv, fn_ptr, 0); *dst_mv = temp_mv; return bestsme; } -static double diamond_search_sad_new(const MACROBLOCK *x, - const search_site_config *cfg, - const MV *init_full_mv, MV *best_full_mv, - int search_param, int lambda, int *num00, - const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs, - int full_mv_num) { +static int64_t diamond_search_sad_new(const MACROBLOCK *x, + const search_site_config *cfg, + const MV *init_full_mv, MV *best_full_mv, + int search_param, int lambda, int *num00, + const vp9_variance_fn_ptr_t *fn_ptr, + const int_mv *nb_full_mvs, + int full_mv_num) { int i, j, step; const MACROBLOCKD *const xd = &x->e_mbd; @@ -2089,7 +1942,7 @@ static double diamond_search_sad_new(const MACROBLOCK *x, const int in_what_stride = xd->plane[0].pre[0].stride; const uint8_t *best_address; - double bestsad; + int64_t bestsad; int best_site = -1; int last_site = -1; @@ -2116,11 +1969,11 @@ static double diamond_search_sad_new(const MACROBLOCK *x, // Check the starting position { - const double mv_dist = - fn_ptr->sdf(what, what_stride, in_what, in_what_stride); - const double mv_cost = - vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num) / - (double)(1 << LOG2_PRECISION); + const int64_t mv_dist = + (int64_t)fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + << LOG2_PRECISION; + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); bestsad = mv_dist + lambda * mv_cost; } @@ -2151,14 +2004,13 @@ static double diamond_search_sad_new(const MACROBLOCK *x, sad_array); for (t = 0; t < 4; t++, i++) { - if (sad_array[t] < bestsad) { + const int64_t mv_dist = (int64_t)sad_array[t] << LOG2_PRECISION; + if (mv_dist < bestsad) { const MV this_mv = { best_full_mv->row + ss_mv[i].row, best_full_mv->col + ss_mv[i].col }; - const double mv_dist = sad_array[t]; - const double mv_cost = - vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num) / - (double)(1 << LOG2_PRECISION); - double thissad = mv_dist + lambda * mv_cost; + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); + const int64_t thissad = mv_dist + lambda * mv_cost; if (thissad < bestsad) { bestsad = thissad; best_site = i; @@ -2174,13 +2026,14 @@ static double diamond_search_sad_new(const MACROBLOCK *x, if (is_mv_in(&x->mv_limits, &this_mv)) { const uint8_t *const check_here = ss_os[i] + best_address; - const double mv_dist = - fn_ptr->sdf(what, what_stride, check_here, in_what_stride); + const int64_t mv_dist = + (int64_t)fn_ptr->sdf(what, what_stride, check_here, + in_what_stride) + << LOG2_PRECISION; if (mv_dist < bestsad) { - const double mv_cost = - vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num) / - (double)(1 << LOG2_PRECISION); - double thissad = mv_dist + lambda * mv_cost; + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); + const int64_t thissad = mv_dist + lambda * mv_cost; if (thissad < bestsad) { bestsad = thissad; best_site = i; @@ -2202,32 +2055,30 @@ static double diamond_search_sad_new(const MACROBLOCK *x, return bestsad; } -void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row, - int mi_col, int rf_idx, BLOCK_SIZE bsize, - int_mv *nb_full_mvs) { - const int mi_width = num_8x8_blocks_wide_lookup[bsize]; - const int mi_height = num_8x8_blocks_high_lookup[bsize]; +int vp9_prepare_nb_full_mvs(const MotionField *motion_field, int mi_row, + int mi_col, int_mv *nb_full_mvs) { + const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize]; + const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize]; const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; + int nb_full_mv_num = 0; int i; + assert(mi_row % mi_height == 0); + assert(mi_col % mi_width == 0); for (i = 0; i < NB_MVS_NUM; ++i) { - int r = dirs[i][0] * mi_height; - int c = dirs[i][1] * mi_width; - if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 && - mi_col + c < tpl_frame->mi_cols) { - const TplDepStats *tpl_ptr = - &tpl_frame - ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c]; - int_mv *mv = - get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c); - if (tpl_ptr->ready[rf_idx]) { - nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv); - } else { - nb_full_mvs[i].as_int = INVALID_MV; + int r = dirs[i][0]; + int c = dirs[i][1]; + int brow = mi_row / mi_height + r; + int bcol = mi_col / mi_width + c; + if (brow >= 0 && brow < motion_field->block_rows && bcol >= 0 && + bcol < motion_field->block_cols) { + if (vp9_motion_field_is_mv_set(motion_field, brow, bcol)) { + int_mv mv = vp9_motion_field_get_mv(motion_field, brow, bcol); + nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv.as_mv); + ++nb_full_mv_num; } - } else { - nb_full_mvs[i].as_int = INVALID_MV; } } + return nb_full_mv_num; } #endif // CONFIG_NON_GREEDY_MV @@ -2585,26 +2436,32 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, return best_sad; } +static int get_exhaustive_threshold(int exhaustive_searches_thresh, + BLOCK_SIZE bsize) { + return exhaustive_searches_thresh >> + (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize])); +} + #if CONFIG_NON_GREEDY_MV // Runs sequence of diamond searches in smaller steps for RD. /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ -double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, - MV *mvp_full, int step_param, int lambda, - int do_refine, - const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs, int full_mv_num, - MV *best_mv) { +int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, MV *mvp_full, int step_param, + int lambda, int do_refine, + const int_mv *nb_full_mvs, int full_mv_num, + MV *best_mv) { + const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; + const SPEED_FEATURES *const sf = &cpi->sf; int n, num00 = 0; - double thissme; - double bestsme; + int thissme; + int bestsme; const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param; const MV center_mv = { 0, 0 }; vpx_clear_system_state(); - bestsme = - diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param, - lambda, &n, fn_ptr, nb_full_mvs, full_mv_num); + diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param, lambda, + &n, fn_ptr, nb_full_mvs, full_mv_num); bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0); @@ -2618,9 +2475,9 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, num00--; } else { MV temp_mv; - thissme = diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv, - step_param + n, lambda, &num00, fn_ptr, - nb_full_mvs, full_mv_num); + diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv, + step_param + n, lambda, &num00, fn_ptr, + nb_full_mvs, full_mv_num); thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0); // check to see if refining search is needed. if (num00 > further_steps - n) do_refine = 0; @@ -2636,8 +2493,8 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, if (do_refine) { const int search_range = 8; MV temp_mv = *best_mv; - thissme = vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range, - fn_ptr, nb_full_mvs, full_mv_num); + vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range, fn_ptr, + nb_full_mvs, full_mv_num); thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0); if (thissme < bestsme) { bestsme = thissme; @@ -2645,8 +2502,16 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, } } - bestsme = (double)full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, - lambda, nb_full_mvs, full_mv_num); + if (sf->exhaustive_searches_thresh < INT_MAX && + !cpi->rc.is_src_frame_alt_ref) { + const int64_t exhaustive_thr = + get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize); + if (bestsme > exhaustive_thr) { + full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda, + nb_full_mvs, full_mv_num); + bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0); + } + } return bestsme; } #endif // CONFIG_NON_GREEDY_MV @@ -2774,24 +2639,25 @@ static int full_pixel_exhaustive(const VP9_COMP *const cpi, } #if CONFIG_NON_GREEDY_MV -double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, - int lambda, int search_range, - const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs, int full_mv_num) { +int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, + int lambda, int search_range, + const vp9_variance_fn_ptr_t *fn_ptr, + const int_mv *nb_full_mvs, + int full_mv_num) { const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv); - double best_sad; + int64_t best_sad; int i, j; vpx_clear_system_state(); { - const double mv_dist = - fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride); - const double mv_cost = - vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num) / - (double)(1 << LOG2_PRECISION); + const int64_t mv_dist = (int64_t)fn_ptr->sdf(what->buf, what->stride, + best_address, in_what->stride) + << LOG2_PRECISION; + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); best_sad = mv_dist + lambda * mv_cost; } @@ -2813,11 +2679,10 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, for (j = 0; j < 4; ++j) { const MV mv = { best_full_mv->row + neighbors[j].row, best_full_mv->col + neighbors[j].col }; - const double mv_dist = sads[j]; - const double mv_cost = - vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num) / - (double)(1 << LOG2_PRECISION); - const double thissad = mv_dist + lambda * mv_cost; + const int64_t mv_dist = (int64_t)sads[j] << LOG2_PRECISION; + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + const int64_t thissad = mv_dist + lambda * mv_cost; if (thissad < best_sad) { best_sad = thissad; best_site = j; @@ -2829,13 +2694,14 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, best_full_mv->col + neighbors[j].col }; if (is_mv_in(&x->mv_limits, &mv)) { - const double mv_dist = - fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride); - const double mv_cost = - vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num) / - (double)(1 << LOG2_PRECISION); - const double thissad = mv_dist + lambda * mv_cost; + const int64_t mv_dist = + (int64_t)fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), + in_what->stride) + << LOG2_PRECISION; + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + const int64_t thissad = mv_dist + lambda * mv_cost; if (thissad < best_sad) { best_sad = thissad; best_site = j; @@ -3034,9 +2900,10 @@ int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x, if (sf->exhaustive_searches_thresh < INT_MAX && !cpi->rc.is_src_frame_alt_ref) { const int64_t exhaustive_thr = - sf->exhaustive_searches_thresh >> - (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize])); - if (var > exhaustive_thr) run_exhaustive_search = 1; + get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize); + if (var > exhaustive_thr) { + run_exhaustive_search = 1; + } } } else if (method == MESH) { run_exhaustive_search = 1; diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h index cafa2d150..0c4d8f23c 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.h +++ b/libvpx/vp9/encoder/vp9_mcomp.h @@ -12,6 +12,9 @@ #define VPX_VP9_ENCODER_VP9_MCOMP_H_ #include "vp9/encoder/vp9_block.h" +#if CONFIG_NON_GREEDY_MV +#include "vp9/encoder/vp9_non_greedy_mv.h" +#endif // CONFIG_NON_GREEDY_MV #include "vpx_dsp/variance.h" #ifdef __cplusplus @@ -126,22 +129,18 @@ void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, const MV *ref_mv); #if CONFIG_NON_GREEDY_MV -#define NB_MVS_NUM 4 struct TplDepStats; -double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, - int lambda, int search_range, - const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs, int full_mv_num); - -double vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x, - MV *mvp_full, int step_param, int lambda, - int do_refine, - const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs, int full_mv_num, - MV *best_mv); - -int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs, - int mv_num); +int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, + int lambda, int search_range, + const vp9_variance_fn_ptr_t *fn_ptr, + const int_mv *nb_full_mvs, int full_mv_num); + +int vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, MV *mvp_full, int step_param, + int lambda, int do_refine, + const int_mv *nb_full_mvs, int full_mv_num, + MV *best_mv); + static INLINE MV get_full_mv(const MV *mv) { MV out_mv; out_mv.row = mv->row >> 3; @@ -149,9 +148,8 @@ static INLINE MV get_full_mv(const MV *mv) { return out_mv; } struct TplDepFrame; -void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row, - int mi_col, int rf_idx, BLOCK_SIZE bsize, - int_mv *nb_full_mvs); +int vp9_prepare_nb_full_mvs(const struct MotionField *motion_field, int mi_row, + int mi_col, int_mv *nb_full_mvs); static INLINE BLOCK_SIZE get_square_block_size(BLOCK_SIZE bsize) { BLOCK_SIZE square_bsize; diff --git a/libvpx/vp9/encoder/vp9_non_greedy_mv.c b/libvpx/vp9/encoder/vp9_non_greedy_mv.c new file mode 100644 index 000000000..4679d6c49 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_non_greedy_mv.c @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_mv.h" +#include "vp9/encoder/vp9_non_greedy_mv.h" +// TODO(angiebird): move non_greedy_mv related functions to this file + +#define LOG2_TABLE_SIZE 1024 +static const int log2_table[LOG2_TABLE_SIZE] = { + 0, // This is a dummy value + 0, 1048576, 1661954, 2097152, 2434718, 2710530, 2943725, + 3145728, 3323907, 3483294, 3627477, 3759106, 3880192, 3992301, + 4096672, 4194304, 4286015, 4372483, 4454275, 4531870, 4605679, + 4676053, 4743299, 4807682, 4869436, 4928768, 4985861, 5040877, + 5093962, 5145248, 5194851, 5242880, 5289431, 5334591, 5378443, + 5421059, 5462508, 5502851, 5542146, 5580446, 5617800, 5654255, + 5689851, 5724629, 5758625, 5791875, 5824409, 5856258, 5887450, + 5918012, 5947969, 5977344, 6006160, 6034437, 6062195, 6089453, + 6116228, 6142538, 6168398, 6193824, 6218829, 6243427, 6267632, + 6291456, 6314910, 6338007, 6360756, 6383167, 6405252, 6427019, + 6448477, 6469635, 6490501, 6511084, 6531390, 6551427, 6571202, + 6590722, 6609993, 6629022, 6647815, 6666376, 6684713, 6702831, + 6720734, 6738427, 6755916, 6773205, 6790299, 6807201, 6823917, + 6840451, 6856805, 6872985, 6888993, 6904834, 6920510, 6936026, + 6951384, 6966588, 6981641, 6996545, 7011304, 7025920, 7040397, + 7054736, 7068940, 7083013, 7096956, 7110771, 7124461, 7138029, + 7151476, 7164804, 7178017, 7191114, 7204100, 7216974, 7229740, + 7242400, 7254954, 7267405, 7279754, 7292003, 7304154, 7316208, + 7328167, 7340032, 7351805, 7363486, 7375079, 7386583, 7398000, + 7409332, 7420579, 7431743, 7442826, 7453828, 7464751, 7475595, + 7486362, 7497053, 7507669, 7518211, 7528680, 7539077, 7549404, + 7559660, 7569847, 7579966, 7590017, 7600003, 7609923, 7619778, + 7629569, 7639298, 7648964, 7658569, 7668114, 7677598, 7687023, + 7696391, 7705700, 7714952, 7724149, 7733289, 7742375, 7751407, + 7760385, 7769310, 7778182, 7787003, 7795773, 7804492, 7813161, + 7821781, 7830352, 7838875, 7847350, 7855777, 7864158, 7872493, + 7880782, 7889027, 7897226, 7905381, 7913492, 7921561, 7929586, + 7937569, 7945510, 7953410, 7961268, 7969086, 7976864, 7984602, + 7992301, 7999960, 8007581, 8015164, 8022709, 8030217, 8037687, + 8045121, 8052519, 8059880, 8067206, 8074496, 8081752, 8088973, + 8096159, 8103312, 8110431, 8117516, 8124569, 8131589, 8138576, + 8145532, 8152455, 8159347, 8166208, 8173037, 8179836, 8186605, + 8193343, 8200052, 8206731, 8213380, 8220001, 8226593, 8233156, + 8239690, 8246197, 8252676, 8259127, 8265550, 8271947, 8278316, + 8284659, 8290976, 8297266, 8303530, 8309768, 8315981, 8322168, + 8328330, 8334467, 8340579, 8346667, 8352730, 8358769, 8364784, + 8370775, 8376743, 8382687, 8388608, 8394506, 8400381, 8406233, + 8412062, 8417870, 8423655, 8429418, 8435159, 8440878, 8446576, + 8452252, 8457908, 8463542, 8469155, 8474748, 8480319, 8485871, + 8491402, 8496913, 8502404, 8507875, 8513327, 8518759, 8524171, + 8529564, 8534938, 8540293, 8545629, 8550947, 8556245, 8561525, + 8566787, 8572031, 8577256, 8582464, 8587653, 8592825, 8597980, + 8603116, 8608236, 8613338, 8618423, 8623491, 8628542, 8633576, + 8638593, 8643594, 8648579, 8653547, 8658499, 8663434, 8668354, + 8673258, 8678145, 8683017, 8687874, 8692715, 8697540, 8702350, + 8707145, 8711925, 8716690, 8721439, 8726174, 8730894, 8735599, + 8740290, 8744967, 8749628, 8754276, 8758909, 8763528, 8768134, + 8772725, 8777302, 8781865, 8786415, 8790951, 8795474, 8799983, + 8804478, 8808961, 8813430, 8817886, 8822328, 8826758, 8831175, + 8835579, 8839970, 8844349, 8848715, 8853068, 8857409, 8861737, + 8866053, 8870357, 8874649, 8878928, 8883195, 8887451, 8891694, + 8895926, 8900145, 8904353, 8908550, 8912734, 8916908, 8921069, + 8925220, 8929358, 8933486, 8937603, 8941708, 8945802, 8949885, + 8953957, 8958018, 8962068, 8966108, 8970137, 8974155, 8978162, + 8982159, 8986145, 8990121, 8994086, 8998041, 9001986, 9005920, + 9009844, 9013758, 9017662, 9021556, 9025440, 9029314, 9033178, + 9037032, 9040877, 9044711, 9048536, 9052352, 9056157, 9059953, + 9063740, 9067517, 9071285, 9075044, 9078793, 9082533, 9086263, + 9089985, 9093697, 9097400, 9101095, 9104780, 9108456, 9112123, + 9115782, 9119431, 9123072, 9126704, 9130328, 9133943, 9137549, + 9141146, 9144735, 9148316, 9151888, 9155452, 9159007, 9162554, + 9166092, 9169623, 9173145, 9176659, 9180165, 9183663, 9187152, + 9190634, 9194108, 9197573, 9201031, 9204481, 9207923, 9211357, + 9214784, 9218202, 9221613, 9225017, 9228412, 9231800, 9235181, + 9238554, 9241919, 9245277, 9248628, 9251971, 9255307, 9258635, + 9261956, 9265270, 9268577, 9271876, 9275169, 9278454, 9281732, + 9285002, 9288266, 9291523, 9294773, 9298016, 9301252, 9304481, + 9307703, 9310918, 9314126, 9317328, 9320523, 9323711, 9326892, + 9330067, 9333235, 9336397, 9339552, 9342700, 9345842, 9348977, + 9352106, 9355228, 9358344, 9361454, 9364557, 9367654, 9370744, + 9373828, 9376906, 9379978, 9383043, 9386102, 9389155, 9392202, + 9395243, 9398278, 9401306, 9404329, 9407345, 9410356, 9413360, + 9416359, 9419351, 9422338, 9425319, 9428294, 9431263, 9434226, + 9437184, 9440136, 9443082, 9446022, 9448957, 9451886, 9454809, + 9457726, 9460638, 9463545, 9466446, 9469341, 9472231, 9475115, + 9477994, 9480867, 9483735, 9486597, 9489454, 9492306, 9495152, + 9497993, 9500828, 9503659, 9506484, 9509303, 9512118, 9514927, + 9517731, 9520530, 9523324, 9526112, 9528895, 9531674, 9534447, + 9537215, 9539978, 9542736, 9545489, 9548237, 9550980, 9553718, + 9556451, 9559179, 9561903, 9564621, 9567335, 9570043, 9572747, + 9575446, 9578140, 9580830, 9583514, 9586194, 9588869, 9591540, + 9594205, 9596866, 9599523, 9602174, 9604821, 9607464, 9610101, + 9612735, 9615363, 9617987, 9620607, 9623222, 9625832, 9628438, + 9631040, 9633637, 9636229, 9638818, 9641401, 9643981, 9646556, + 9649126, 9651692, 9654254, 9656812, 9659365, 9661914, 9664459, + 9666999, 9669535, 9672067, 9674594, 9677118, 9679637, 9682152, + 9684663, 9687169, 9689672, 9692170, 9694665, 9697155, 9699641, + 9702123, 9704601, 9707075, 9709545, 9712010, 9714472, 9716930, + 9719384, 9721834, 9724279, 9726721, 9729159, 9731593, 9734024, + 9736450, 9738872, 9741291, 9743705, 9746116, 9748523, 9750926, + 9753326, 9755721, 9758113, 9760501, 9762885, 9765266, 9767642, + 9770015, 9772385, 9774750, 9777112, 9779470, 9781825, 9784175, + 9786523, 9788866, 9791206, 9793543, 9795875, 9798204, 9800530, + 9802852, 9805170, 9807485, 9809797, 9812104, 9814409, 9816710, + 9819007, 9821301, 9823591, 9825878, 9828161, 9830441, 9832718, + 9834991, 9837261, 9839527, 9841790, 9844050, 9846306, 9848559, + 9850808, 9853054, 9855297, 9857537, 9859773, 9862006, 9864235, + 9866462, 9868685, 9870904, 9873121, 9875334, 9877544, 9879751, + 9881955, 9884155, 9886352, 9888546, 9890737, 9892925, 9895109, + 9897291, 9899469, 9901644, 9903816, 9905985, 9908150, 9910313, + 9912473, 9914629, 9916783, 9918933, 9921080, 9923225, 9925366, + 9927504, 9929639, 9931771, 9933900, 9936027, 9938150, 9940270, + 9942387, 9944502, 9946613, 9948721, 9950827, 9952929, 9955029, + 9957126, 9959219, 9961310, 9963398, 9965484, 9967566, 9969645, + 9971722, 9973796, 9975866, 9977934, 9980000, 9982062, 9984122, + 9986179, 9988233, 9990284, 9992332, 9994378, 9996421, 9998461, + 10000498, 10002533, 10004565, 10006594, 10008621, 10010644, 10012665, + 10014684, 10016700, 10018713, 10020723, 10022731, 10024736, 10026738, + 10028738, 10030735, 10032729, 10034721, 10036710, 10038697, 10040681, + 10042662, 10044641, 10046617, 10048591, 10050562, 10052530, 10054496, + 10056459, 10058420, 10060379, 10062334, 10064287, 10066238, 10068186, + 10070132, 10072075, 10074016, 10075954, 10077890, 10079823, 10081754, + 10083682, 10085608, 10087532, 10089453, 10091371, 10093287, 10095201, + 10097112, 10099021, 10100928, 10102832, 10104733, 10106633, 10108529, + 10110424, 10112316, 10114206, 10116093, 10117978, 10119861, 10121742, + 10123620, 10125495, 10127369, 10129240, 10131109, 10132975, 10134839, + 10136701, 10138561, 10140418, 10142273, 10144126, 10145976, 10147825, + 10149671, 10151514, 10153356, 10155195, 10157032, 10158867, 10160699, + 10162530, 10164358, 10166184, 10168007, 10169829, 10171648, 10173465, + 10175280, 10177093, 10178904, 10180712, 10182519, 10184323, 10186125, + 10187925, 10189722, 10191518, 10193311, 10195103, 10196892, 10198679, + 10200464, 10202247, 10204028, 10205806, 10207583, 10209357, 10211130, + 10212900, 10214668, 10216435, 10218199, 10219961, 10221721, 10223479, + 10225235, 10226989, 10228741, 10230491, 10232239, 10233985, 10235728, + 10237470, 10239210, 10240948, 10242684, 10244417, 10246149, 10247879, + 10249607, 10251333, 10253057, 10254779, 10256499, 10258217, 10259933, + 10261647, 10263360, 10265070, 10266778, 10268485, 10270189, 10271892, + 10273593, 10275292, 10276988, 10278683, 10280376, 10282068, 10283757, + 10285444, 10287130, 10288814, 10290495, 10292175, 10293853, 10295530, + 10297204, 10298876, 10300547, 10302216, 10303883, 10305548, 10307211, + 10308873, 10310532, 10312190, 10313846, 10315501, 10317153, 10318804, + 10320452, 10322099, 10323745, 10325388, 10327030, 10328670, 10330308, + 10331944, 10333578, 10335211, 10336842, 10338472, 10340099, 10341725, + 10343349, 10344971, 10346592, 10348210, 10349828, 10351443, 10353057, + 10354668, 10356279, 10357887, 10359494, 10361099, 10362702, 10364304, + 10365904, 10367502, 10369099, 10370694, 10372287, 10373879, 10375468, + 10377057, 10378643, 10380228, 10381811, 10383393, 10384973, 10386551, + 10388128, 10389703, 10391276, 10392848, 10394418, 10395986, 10397553, + 10399118, 10400682, 10402244, 10403804, 10405363, 10406920, 10408476, + 10410030, 10411582, 10413133, 10414682, 10416230, 10417776, 10419320, + 10420863, 10422404, 10423944, 10425482, 10427019, 10428554, 10430087, + 10431619, 10433149, 10434678, 10436206, 10437731, 10439256, 10440778, + 10442299, 10443819, 10445337, 10446854, 10448369, 10449882, 10451394, + 10452905, 10454414, 10455921, 10457427, 10458932, 10460435, 10461936, + 10463436, 10464935, 10466432, 10467927, 10469422, 10470914, 10472405, + 10473895, 10475383, 10476870, 10478355, 10479839, 10481322, 10482802, + 10484282, +}; + +static int mi_size_to_block_size(int mi_bsize, int mi_num) { + return (mi_num % mi_bsize) ? mi_num / mi_bsize + 1 : mi_num / mi_bsize; +} + +Status vp9_alloc_motion_field_info(MotionFieldInfo *motion_field_info, + int frame_num, int mi_rows, int mi_cols) { + int frame_idx, rf_idx, square_block_idx; + if (motion_field_info->allocated) { + // TODO(angiebird): Avoid re-allocate buffer if possible + vp9_free_motion_field_info(motion_field_info); + } + motion_field_info->frame_num = frame_num; + motion_field_info->motion_field_array = + vpx_calloc(frame_num, sizeof(*motion_field_info->motion_field_array)); + for (frame_idx = 0; frame_idx < frame_num; ++frame_idx) { + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES; + ++square_block_idx) { + BLOCK_SIZE bsize = square_block_idx_to_bsize(square_block_idx); + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int block_rows = mi_size_to_block_size(mi_height, mi_rows); + const int block_cols = mi_size_to_block_size(mi_width, mi_cols); + MotionField *motion_field = + &motion_field_info + ->motion_field_array[frame_idx][rf_idx][square_block_idx]; + Status status = + vp9_alloc_motion_field(motion_field, bsize, block_rows, block_cols); + if (status == STATUS_FAILED) { + return STATUS_FAILED; + } + } + } + } + motion_field_info->allocated = 1; + return STATUS_OK; +} + +Status vp9_alloc_motion_field(MotionField *motion_field, BLOCK_SIZE bsize, + int block_rows, int block_cols) { + Status status = STATUS_OK; + motion_field->ready = 0; + motion_field->bsize = bsize; + motion_field->block_rows = block_rows; + motion_field->block_cols = block_cols; + motion_field->block_num = block_rows * block_cols; + motion_field->mf = + vpx_calloc(motion_field->block_num, sizeof(*motion_field->mf)); + if (motion_field->mf == NULL) { + status = STATUS_FAILED; + } + motion_field->set_mv = + vpx_calloc(motion_field->block_num, sizeof(*motion_field->set_mv)); + if (motion_field->set_mv == NULL) { + vpx_free(motion_field->mf); + motion_field->mf = NULL; + status = STATUS_FAILED; + } + motion_field->local_structure = vpx_calloc( + motion_field->block_num, sizeof(*motion_field->local_structure)); + if (motion_field->local_structure == NULL) { + vpx_free(motion_field->mf); + motion_field->mf = NULL; + vpx_free(motion_field->set_mv); + motion_field->set_mv = NULL; + status = STATUS_FAILED; + } + return status; +} + +void vp9_free_motion_field(MotionField *motion_field) { + vpx_free(motion_field->mf); + vpx_free(motion_field->set_mv); + vpx_free(motion_field->local_structure); + vp9_zero(*motion_field); +} + +void vp9_free_motion_field_info(MotionFieldInfo *motion_field_info) { + if (motion_field_info->allocated) { + int frame_idx, rf_idx, square_block_idx; + for (frame_idx = 0; frame_idx < motion_field_info->frame_num; ++frame_idx) { + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES; + ++square_block_idx) { + MotionField *motion_field = + &motion_field_info + ->motion_field_array[frame_idx][rf_idx][square_block_idx]; + vp9_free_motion_field(motion_field); + } + } + } + vpx_free(motion_field_info->motion_field_array); + motion_field_info->motion_field_array = NULL; + motion_field_info->frame_num = 0; + motion_field_info->allocated = 0; + } +} + +MotionField *vp9_motion_field_info_get_motion_field( + MotionFieldInfo *motion_field_info, int frame_idx, int rf_idx, + BLOCK_SIZE bsize) { + int square_block_idx = get_square_block_idx(bsize); + assert(frame_idx < motion_field_info->frame_num); + assert(motion_field_info->allocated == 1); + return &motion_field_info + ->motion_field_array[frame_idx][rf_idx][square_block_idx]; +} + +int vp9_motion_field_is_mv_set(const MotionField *motion_field, int brow, + int bcol) { + assert(brow >= 0 && brow < motion_field->block_rows); + assert(bcol >= 0 && bcol < motion_field->block_cols); + return motion_field->set_mv[brow * motion_field->block_cols + bcol]; +} + +int_mv vp9_motion_field_get_mv(const MotionField *motion_field, int brow, + int bcol) { + assert(brow >= 0 && brow < motion_field->block_rows); + assert(bcol >= 0 && bcol < motion_field->block_cols); + return motion_field->mf[brow * motion_field->block_cols + bcol]; +} + +int_mv vp9_motion_field_mi_get_mv(const MotionField *motion_field, int mi_row, + int mi_col) { + const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize]; + const int brow = mi_row / mi_height; + const int bcol = mi_col / mi_width; + assert(mi_row % mi_height == 0); + assert(mi_col % mi_width == 0); + return vp9_motion_field_get_mv(motion_field, brow, bcol); +} + +void vp9_motion_field_mi_set_mv(MotionField *motion_field, int mi_row, + int mi_col, int_mv mv) { + const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize]; + const int brow = mi_row / mi_height; + const int bcol = mi_col / mi_width; + assert(mi_row % mi_height == 0); + assert(mi_col % mi_width == 0); + assert(brow >= 0 && brow < motion_field->block_rows); + assert(bcol >= 0 && bcol < motion_field->block_cols); + motion_field->mf[brow * motion_field->block_cols + bcol] = mv; + motion_field->set_mv[brow * motion_field->block_cols + bcol] = 1; +} + +void vp9_motion_field_reset_mvs(MotionField *motion_field) { + memset(motion_field->set_mv, 0, + motion_field->block_num * sizeof(*motion_field->set_mv)); +} + +static int64_t log2_approximation(int64_t v) { + assert(v > 0); + if (v < LOG2_TABLE_SIZE) { + return log2_table[v]; + } else { + // use linear approximation when v >= 2^10 + const int slope = + 1477; // slope = 1 / (log(2) * 1024) * (1 << LOG2_PRECISION) + assert(LOG2_TABLE_SIZE == 1 << 10); + + return slope * (v - LOG2_TABLE_SIZE) + (10 << LOG2_PRECISION); + } +} + +int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs, + int mv_num) { + // The behavior of this function is to compute log2 of mv difference, + // i.e. min log2(1 + row_diff * row_diff + col_diff * col_diff) + // against available neighbor mvs. + // Since the log2 is monotonically increasing, we can compute + // min row_diff * row_diff + col_diff * col_diff first + // then apply log2 in the end. + int i; + int64_t min_abs_diff = INT64_MAX; + int cnt = 0; + assert(mv_num <= NB_MVS_NUM); + for (i = 0; i < mv_num; ++i) { + MV nb_mv = nb_full_mvs[i].as_mv; + const int64_t row_diff = abs(mv->row - nb_mv.row); + const int64_t col_diff = abs(mv->col - nb_mv.col); + const int64_t abs_diff = row_diff * row_diff + col_diff * col_diff; + assert(nb_full_mvs[i].as_int != INVALID_MV); + min_abs_diff = VPXMIN(abs_diff, min_abs_diff); + ++cnt; + } + if (cnt) { + return log2_approximation(1 + min_abs_diff); + } + return 0; +} + +static FloatMV get_smooth_motion_vector(const FloatMV scaled_search_mv, + const FloatMV *tmp_mf, + const int (*M)[MF_LOCAL_STRUCTURE_SIZE], + int rows, int cols, int row, int col, + float alpha) { + const FloatMV tmp_mv = tmp_mf[row * cols + col]; + int idx_row, idx_col; + FloatMV avg_nb_mv = { 0.0f, 0.0f }; + FloatMV mv = { 0.0f, 0.0f }; + float filter[3][3] = { { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f }, + { 1.0f / 6.0f, 0.0f, 1.0f / 6.0f }, + { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f } }; + for (idx_row = 0; idx_row < 3; ++idx_row) { + int nb_row = row + idx_row - 1; + for (idx_col = 0; idx_col < 3; ++idx_col) { + int nb_col = col + idx_col - 1; + if (nb_row < 0 || nb_col < 0 || nb_row >= rows || nb_col >= cols) { + avg_nb_mv.row += (tmp_mv.row) * filter[idx_row][idx_col]; + avg_nb_mv.col += (tmp_mv.col) * filter[idx_row][idx_col]; + } else { + const FloatMV nb_mv = tmp_mf[nb_row * cols + nb_col]; + avg_nb_mv.row += (nb_mv.row) * filter[idx_row][idx_col]; + avg_nb_mv.col += (nb_mv.col) * filter[idx_row][idx_col]; + } + } + } + { + // M is the local variance of reference frame + float M00 = M[row * cols + col][0]; + float M01 = M[row * cols + col][1]; + float M10 = M[row * cols + col][2]; + float M11 = M[row * cols + col][3]; + + float det = (M00 + alpha) * (M11 + alpha) - M01 * M10; + + float inv_M00 = (M11 + alpha) / det; + float inv_M01 = -M01 / det; + float inv_M10 = -M10 / det; + float inv_M11 = (M00 + alpha) / det; + + float inv_MM00 = inv_M00 * M00 + inv_M01 * M10; + float inv_MM01 = inv_M00 * M01 + inv_M01 * M11; + float inv_MM10 = inv_M10 * M00 + inv_M11 * M10; + float inv_MM11 = inv_M10 * M01 + inv_M11 * M11; + + mv.row = inv_M00 * avg_nb_mv.row * alpha + inv_M01 * avg_nb_mv.col * alpha + + inv_MM00 * scaled_search_mv.row + inv_MM01 * scaled_search_mv.col; + mv.col = inv_M10 * avg_nb_mv.row * alpha + inv_M11 * avg_nb_mv.col * alpha + + inv_MM10 * scaled_search_mv.row + inv_MM11 * scaled_search_mv.col; + } + return mv; +} + +void vp9_get_smooth_motion_field(const MV *search_mf, + const int (*M)[MF_LOCAL_STRUCTURE_SIZE], + int rows, int cols, BLOCK_SIZE bsize, + float alpha, int num_iters, MV *smooth_mf) { + // M is the local variation of reference frame + // build two buffers + FloatMV *input = (FloatMV *)malloc(rows * cols * sizeof(FloatMV)); + FloatMV *output = (FloatMV *)malloc(rows * cols * sizeof(FloatMV)); + int idx; + int row, col; + int bw = 4 << b_width_log2_lookup[bsize]; + int bh = 4 << b_height_log2_lookup[bsize]; + // copy search results to input buffer + for (idx = 0; idx < rows * cols; ++idx) { + input[idx].row = (float)search_mf[idx].row / bh; + input[idx].col = (float)search_mf[idx].col / bw; + } + for (idx = 0; idx < num_iters; ++idx) { + FloatMV *tmp; + for (row = 0; row < rows; ++row) { + for (col = 0; col < cols; ++col) { + // note: the scaled_search_mf and smooth_mf are all scaled by macroblock + // size + const MV search_mv = search_mf[row * cols + col]; + FloatMV scaled_search_mv = { (float)search_mv.row / bh, + (float)search_mv.col / bw }; + output[row * cols + col] = get_smooth_motion_vector( + scaled_search_mv, input, M, rows, cols, row, col, alpha); + } + } + // swap buffers + tmp = input; + input = output; + output = tmp; + } + // copy smoothed results to output + for (idx = 0; idx < rows * cols; ++idx) { + smooth_mf[idx].row = (int)(input[idx].row * bh); + smooth_mf[idx].col = (int)(input[idx].col * bw); + } + free(input); + free(output); +} + +void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame, + const YV12_BUFFER_CONFIG *ref_frame, + const MV *search_mf, + const vp9_variance_fn_ptr_t *fn_ptr, int rows, + int cols, BLOCK_SIZE bsize, + int (*M)[MF_LOCAL_STRUCTURE_SIZE]) { + const int bw = 4 << b_width_log2_lookup[bsize]; + const int bh = 4 << b_height_log2_lookup[bsize]; + const int cur_stride = cur_frame->y_stride; + const int ref_stride = ref_frame->y_stride; + const int width = ref_frame->y_width; + const int height = ref_frame->y_height; + int row, col; + for (row = 0; row < rows; ++row) { + for (col = 0; col < cols; ++col) { + int cur_offset = row * bh * cur_stride + col * bw; + uint8_t *center = cur_frame->y_buffer + cur_offset; + int ref_h = row * bh + search_mf[row * cols + col].row; + int ref_w = col * bw + search_mf[row * cols + col].col; + int ref_offset; + uint8_t *target; + uint8_t *nb; + int search_dist; + int nb_dist; + int I_row = 0, I_col = 0; + // TODO(Dan): handle the case that when reference frame block beyond the + // boundary + ref_h = ref_h < 0 ? 0 : (ref_h >= height - bh ? height - bh - 1 : ref_h); + ref_w = ref_w < 0 ? 0 : (ref_w >= width - bw ? width - bw - 1 : ref_w); + // compute search results distortion + // TODO(Dan): maybe need to use vp9 function to find the reference block, + // to compare with the results of my python code, I first use my way to + // compute the reference block + ref_offset = ref_h * ref_stride + ref_w; + target = ref_frame->y_buffer + ref_offset; + search_dist = fn_ptr->sdf(center, cur_stride, target, ref_stride); + // compute target's neighbors' distortions + // TODO(Dan): if using padding, the boundary condition may vary + // up + if (ref_h - bh >= 0) { + nb = target - ref_stride * bh; + nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); + I_row += nb_dist - search_dist; + } + // down + if (ref_h + bh < height - bh) { + nb = target + ref_stride * bh; + nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); + I_row += nb_dist - search_dist; + } + if (ref_h - bh >= 0 && ref_h + bh < height - bh) { + I_row /= 2; + } + I_row /= (bw * bh); + // left + if (ref_w - bw >= 0) { + nb = target - bw; + nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); + I_col += nb_dist - search_dist; + } + // down + if (ref_w + bw < width - bw) { + nb = target + bw; + nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); + I_col += nb_dist - search_dist; + } + if (ref_w - bw >= 0 && ref_w + bw < width - bw) { + I_col /= 2; + } + I_col /= (bw * bh); + M[row * cols + col][0] = I_row * I_row; + M[row * cols + col][1] = I_row * I_col; + M[row * cols + col][2] = I_col * I_row; + M[row * cols + col][3] = I_col * I_col; + } + } +} diff --git a/libvpx/vp9/encoder/vp9_non_greedy_mv.h b/libvpx/vp9/encoder/vp9_non_greedy_mv.h new file mode 100644 index 000000000..c2bd69722 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_non_greedy_mv.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_ +#define VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_ + +#include "vp9/common/vp9_enums.h" +#include "vp9/common/vp9_blockd.h" +#include "vpx_scale/yv12config.h" +#include "vpx_dsp/variance.h" + +#ifdef __cplusplus +extern "C" { +#endif +#define NB_MVS_NUM 4 +#define LOG2_PRECISION 20 +#define MF_LOCAL_STRUCTURE_SIZE 4 +#define SQUARE_BLOCK_SIZES 4 + +typedef enum Status { STATUS_OK = 0, STATUS_FAILED = 1 } Status; + +typedef struct MotionField { + int ready; + BLOCK_SIZE bsize; + int block_rows; + int block_cols; + int block_num; // block_num == block_rows * block_cols + int (*local_structure)[MF_LOCAL_STRUCTURE_SIZE]; + int_mv *mf; + int *set_mv; + int mv_log_scale; +} MotionField; + +typedef struct MotionFieldInfo { + int frame_num; + int allocated; + MotionField (*motion_field_array)[MAX_INTER_REF_FRAMES][SQUARE_BLOCK_SIZES]; +} MotionFieldInfo; + +typedef struct { + float row, col; +} FloatMV; + +static INLINE int get_square_block_idx(BLOCK_SIZE bsize) { + if (bsize == BLOCK_4X4) { + return 0; + } + if (bsize == BLOCK_8X8) { + return 1; + } + if (bsize == BLOCK_16X16) { + return 2; + } + if (bsize == BLOCK_32X32) { + return 3; + } + assert(0 && "ERROR: non-square block size"); + return -1; +} + +static INLINE BLOCK_SIZE square_block_idx_to_bsize(int square_block_idx) { + if (square_block_idx == 0) { + return BLOCK_4X4; + } + if (square_block_idx == 1) { + return BLOCK_8X8; + } + if (square_block_idx == 2) { + return BLOCK_16X16; + } + if (square_block_idx == 3) { + return BLOCK_32X32; + } + assert(0 && "ERROR: invalid square_block_idx"); + return BLOCK_INVALID; +} + +Status vp9_alloc_motion_field_info(MotionFieldInfo *motion_field_info, + int frame_num, int mi_rows, int mi_cols); + +Status vp9_alloc_motion_field(MotionField *motion_field, BLOCK_SIZE bsize, + int block_rows, int block_cols); + +void vp9_free_motion_field(MotionField *motion_field); + +void vp9_free_motion_field_info(MotionFieldInfo *motion_field_info); + +int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs, + int mv_num); + +void vp9_get_smooth_motion_field(const MV *search_mf, + const int (*M)[MF_LOCAL_STRUCTURE_SIZE], + int rows, int cols, BLOCK_SIZE bize, + float alpha, int num_iters, MV *smooth_mf); + +void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame, + const YV12_BUFFER_CONFIG *ref_frame, + const MV *search_mf, + const vp9_variance_fn_ptr_t *fn_ptr, int rows, + int cols, BLOCK_SIZE bsize, + int (*M)[MF_LOCAL_STRUCTURE_SIZE]); + +MotionField *vp9_motion_field_info_get_motion_field( + MotionFieldInfo *motion_field_info, int frame_idx, int rf_idx, + BLOCK_SIZE bsize); + +void vp9_motion_field_mi_set_mv(MotionField *motion_field, int mi_row, + int mi_col, int_mv mv); + +void vp9_motion_field_reset_mvs(MotionField *motion_field); + +int_mv vp9_motion_field_get_mv(const MotionField *motion_field, int brow, + int bcol); +int_mv vp9_motion_field_mi_get_mv(const MotionField *motion_field, int mi_row, + int mi_col); +int vp9_motion_field_is_mv_set(const MotionField *motion_field, int brow, + int bcol); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_ diff --git a/libvpx/vp9/encoder/vp9_pickmode.c b/libvpx/vp9/encoder/vp9_pickmode.c index 513b9f678..9b2e48505 100644 --- a/libvpx/vp9/encoder/vp9_pickmode.c +++ b/libvpx/vp9/encoder/vp9_pickmode.c @@ -1501,7 +1501,8 @@ static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, int best_early_term = 0; int best_flag_preduv_computed[2] = { 0 }; INTERP_FILTER filter_start = force_smooth_filter ? EIGHTTAP_SMOOTH : EIGHTTAP; - for (filter = filter_start; filter <= EIGHTTAP_SMOOTH; ++filter) { + INTERP_FILTER filter_end = EIGHTTAP_SMOOTH; + for (filter = filter_start; filter <= filter_end; ++filter) { int64_t cost; mi->interp_filter = filter; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); @@ -1531,9 +1532,11 @@ static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, free_pred_buffer(*this_mode_pred); *this_mode_pred = current_pred; } - current_pred = &tmp[get_pred_buffer(tmp, 3)]; - pd->dst.buf = current_pred->data; - pd->dst.stride = bw; + if (filter != filter_end) { + current_pred = &tmp[get_pred_buffer(tmp, 3)]; + pd->dst.buf = current_pred->data; + pd->dst.stride = bw; + } } } } @@ -1554,6 +1557,9 @@ static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, if (reuse_inter_pred) { pd->dst.buf = (*this_mode_pred)->data; pd->dst.stride = (*this_mode_pred)->stride; + } else if (best_filter < filter_end) { + mi->interp_filter = best_filter; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); } } @@ -1713,9 +1719,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // process. // tmp[3] points to dst buffer, and the other 3 point to allocated buffers. PRED_BUFFER tmp[4]; - DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64] VPX_UNINITIALIZED); #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64]); + DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64] VPX_UNINITIALIZED); #endif struct buf_2d orig_dst = pd->dst; PRED_BUFFER *this_mode_pred = NULL; @@ -2554,6 +2560,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize])) continue; + if (cpi->sf.rt_intra_dc_only_low_content && this_mode != DC_PRED && + x->content_state_sb != kVeryHighSad) + continue; + if ((cpi->sf.adaptive_rd_thresh_row_mt && rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh, &rd_thresh_freq_fact[mode_index])) || diff --git a/libvpx/vp9/encoder/vp9_ratectrl.c b/libvpx/vp9/encoder/vp9_ratectrl.c index 6745b0adf..cbafbf7b9 100644 --- a/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/libvpx/vp9/encoder/vp9_ratectrl.c @@ -436,7 +436,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { rc->use_post_encode_drop = 0; rc->ext_use_post_encode_drop = 0; rc->arf_active_best_quality_adjustment_factor = 1.0; - + rc->arf_increase_active_best_quality = 0; rc->preserve_arf_as_gld = 0; rc->preserve_next_arf_as_gld = 0; rc->show_arf_as_gld = 0; @@ -504,7 +504,7 @@ static int check_buffer_below_thresh(VP9_COMP *cpi, int drop_mark) { } } -static int drop_frame(VP9_COMP *cpi) { +int vp9_test_drop(VP9_COMP *cpi) { const VP9EncoderConfig *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; SVC *svc = &cpi->svc; @@ -609,13 +609,15 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) { SVC *svc = &cpi->svc; int svc_prev_layer_dropped = 0; // In the constrained or full_superframe framedrop mode for svc - // (framedrop_mode != LAYER_DROP), if the previous spatial layer was - // dropped, drop the current spatial layer. + // (framedrop_mode != (LAYER_DROP && CONSTRAINED_FROM_ABOVE)), + // if the previous spatial layer was dropped, drop the current spatial layer. if (cpi->use_svc && svc->spatial_layer_id > 0 && svc->drop_spatial_layer[svc->spatial_layer_id - 1]) svc_prev_layer_dropped = 1; - if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP) || - drop_frame(cpi)) { + if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP && + svc->framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP) || + svc->force_drop_constrained_from_above[svc->spatial_layer_id] || + vp9_test_drop(cpi)) { vp9_rc_postencode_update_drop_frame(cpi); cpi->ext_refresh_frame_flags_pending = 0; cpi->last_frame_dropped = 1; @@ -625,14 +627,17 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) { svc->drop_count[svc->spatial_layer_id]++; svc->skip_enhancement_layer = 1; if (svc->framedrop_mode == LAYER_DROP || + (svc->framedrop_mode == CONSTRAINED_FROM_ABOVE_DROP && + svc->force_drop_constrained_from_above[svc->number_spatial_layers - + 1] == 0) || svc->drop_spatial_layer[0] == 0) { - // For the case of constrained drop mode where the base is dropped - // (drop_spatial_layer[0] == 1), which means full superframe dropped, - // we don't increment the svc frame counters. In particular temporal - // layer counter (which is incremented in vp9_inc_frame_in_layer()) - // won't be incremented, so on a dropped frame we try the same - // temporal_layer_id on next incoming frame. This is to avoid an - // issue with temporal alignement with full superframe dropping. + // For the case of constrained drop mode where full superframe is + // dropped, we don't increment the svc frame counters. + // In particular temporal layer counter (which is incremented in + // vp9_inc_frame_in_layer()) won't be incremented, so on a dropped + // frame we try the same temporal_layer_id on next incoming frame. + // This is to avoid an issue with temporal alignement with full + // superframe dropping. vp9_inc_frame_in_layer(cpi); } if (svc->spatial_layer_id == svc->number_spatial_layers - 1) { @@ -1420,8 +1425,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, int active_worst_quality = cpi->twopass.active_worst_quality; int q; int *inter_minq; - int arf_active_best_quality_adjustment, arf_active_best_quality_max; - int *arfgf_high_motion_minq; + int arf_active_best_quality_hl; + int *arfgf_high_motion_minq, *arfgf_low_motion_minq; const int boost_frame = !rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame); @@ -1448,14 +1453,20 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, if (q < cq_level) q = cq_level; } active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); + arf_active_best_quality_hl = active_best_quality; - ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_high_motion_minq); - arf_active_best_quality_max = arfgf_high_motion_minq[q]; - arf_active_best_quality_adjustment = - arf_active_best_quality_max - active_best_quality; - active_best_quality = arf_active_best_quality_max - - (int)(arf_active_best_quality_adjustment * - rc->arf_active_best_quality_adjustment_factor); + if (rc->arf_increase_active_best_quality == 1) { + ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_high_motion_minq); + arf_active_best_quality_hl = arfgf_high_motion_minq[q]; + } else if (rc->arf_increase_active_best_quality == -1) { + ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_low_motion_minq); + arf_active_best_quality_hl = arfgf_low_motion_minq[q]; + } + active_best_quality = + (int)((double)active_best_quality * + rc->arf_active_best_quality_adjustment_factor + + (double)arf_active_best_quality_hl * + (1.0 - rc->arf_active_best_quality_adjustment_factor)); // Modify best quality for second level arfs. For mode VPX_Q this // becomes the baseline frame q. @@ -1480,17 +1491,30 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, // Extension to max or min Q if undershoot or overshoot is outside // the permitted range. if (frame_is_intra_only(cm) || boost_frame) { + const int layer_depth = gf_group->layer_depth[gf_group_index]; active_best_quality -= (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast); active_worst_quality += (cpi->twopass.extend_maxq / 2); + + if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) { + assert(layer_depth > 1); + active_best_quality = + VPXMAX(active_best_quality, + cpi->twopass.last_qindex_of_arf_layer[layer_depth - 1]); + } } else { + const int max_layer_depth = gf_group->max_layer_depth; + assert(max_layer_depth > 0); + active_best_quality -= (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2; active_worst_quality += cpi->twopass.extend_maxq; // For normal frames do not allow an active minq lower than the q used for // the last boosted frame. - active_best_quality = VPXMAX(active_best_quality, rc->last_boosted_qindex); + active_best_quality = + VPXMAX(active_best_quality, + cpi->twopass.last_qindex_of_arf_layer[max_layer_depth - 1]); } #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY @@ -1789,6 +1813,9 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { RATE_CONTROL *const rc = &cpi->rc; SVC *const svc = &cpi->svc; const int qindex = cm->base_qindex; + const GF_GROUP *gf_group = &cpi->twopass.gf_group; + const int gf_group_index = cpi->twopass.gf_group.index; + const int layer_depth = gf_group->layer_depth[gf_group_index]; // Update rate control heuristics rc->projected_frame_size = (int)(bytes_used << 3); @@ -1843,6 +1870,15 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) { rc->last_boosted_qindex = qindex; } + + if ((qindex < cpi->twopass.last_qindex_of_arf_layer[layer_depth]) || + (cm->frame_type == KEY_FRAME) || + (!rc->constrained_gf_group && + (cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) { + cpi->twopass.last_qindex_of_arf_layer[layer_depth] = qindex; + } + if (frame_is_intra_only(cm)) rc->last_kf_qindex = qindex; update_buffer_level_postencode(cpi, rc->projected_frame_size); @@ -2441,12 +2477,23 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi, // Set Maximum gf/arf interval rc->max_gf_interval = oxcf->max_gf_interval; rc->min_gf_interval = oxcf->min_gf_interval; +#if CONFIG_RATE_CTRL + if (rc->min_gf_interval == 0) { + rc->min_gf_interval = vp9_rc_get_default_min_gf_interval( + oxcf->width, oxcf->height, oxcf->init_framerate); + } + if (rc->max_gf_interval == 0) { + rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( + oxcf->init_framerate, rc->min_gf_interval); + } +#else if (rc->min_gf_interval == 0) rc->min_gf_interval = vp9_rc_get_default_min_gf_interval( oxcf->width, oxcf->height, cpi->framerate); if (rc->max_gf_interval == 0) rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( cpi->framerate, rc->min_gf_interval); +#endif // Extended max interval for genuinely static scenes like slide shows. rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH; diff --git a/libvpx/vp9/encoder/vp9_ratectrl.h b/libvpx/vp9/encoder/vp9_ratectrl.h index 09d69e4d4..7dbe17dc5 100644 --- a/libvpx/vp9/encoder/vp9_ratectrl.h +++ b/libvpx/vp9/encoder/vp9_ratectrl.h @@ -198,7 +198,7 @@ typedef struct { int damped_adjustment[RATE_FACTOR_LEVELS]; double arf_active_best_quality_adjustment_factor; - int arf_active_best_quality_adjustment_window; + int arf_increase_active_best_quality; int preserve_arf_as_gld; int preserve_next_arf_as_gld; @@ -267,6 +267,8 @@ void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi); // Post encode drop for CBR mode. int post_encode_drop_cbr(struct VP9_COMP *cpi, size_t *size); +int vp9_test_drop(struct VP9_COMP *cpi); + // Decide if we should drop this frame: For 1-pass CBR. // Changes only the decimation count in the rate control structure int vp9_rc_drop_frame(struct VP9_COMP *cpi); diff --git a/libvpx/vp9/encoder/vp9_rd.h b/libvpx/vp9/encoder/vp9_rd.h index df6ea9094..908989c07 100644 --- a/libvpx/vp9/encoder/vp9_rd.h +++ b/libvpx/vp9/encoder/vp9_rd.h @@ -38,8 +38,6 @@ extern "C" { #define MV_COST_WEIGHT 108 #define MV_COST_WEIGHT_SUB 120 -#define INVALID_MV 0x80008000 - #define MAX_MODES 30 #define MAX_REFS 6 diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c index d07d91774..fa7472ca6 100644 --- a/libvpx/vp9/encoder/vp9_rdopt.c +++ b/libvpx/vp9/encoder/vp9_rdopt.c @@ -2494,19 +2494,19 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, const int ph = num_4x4_blocks_high_lookup[bsize] << 2; MV pred_mv[3]; + int bestsme = INT_MAX; #if CONFIG_NON_GREEDY_MV - double bestsme; - int_mv nb_full_mvs[NB_MVS_NUM]; - const int nb_full_mv_num = NB_MVS_NUM; int gf_group_idx = cpi->twopass.gf_group.index; int gf_rf_idx = ref_frame_to_gf_rf_idx(ref); BLOCK_SIZE square_bsize = get_square_block_size(bsize); + int_mv nb_full_mvs[NB_MVS_NUM] = { 0 }; + MotionField *motion_field = vp9_motion_field_info_get_motion_field( + &cpi->motion_field_info, gf_group_idx, gf_rf_idx, square_bsize); + const int nb_full_mv_num = + vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs); const int lambda = (pw * ph) / 4; assert(pw * ph == lambda << 2); - vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col, - gf_rf_idx, square_bsize, nb_full_mvs); #else // CONFIG_NON_GREEDY_MV - int bestsme = INT_MAX; int sadpb = x->sadperbit16; #endif // CONFIG_NON_GREEDY_MV @@ -2580,9 +2580,9 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, mvp_full.row >>= 3; #if CONFIG_NON_GREEDY_MV - bestsme = vp9_full_pixel_diamond_new(cpi, x, &mvp_full, step_param, lambda, 1, - &cpi->fn_ptr[bsize], nb_full_mvs, - nb_full_mv_num, &tmp_mv->as_mv); + bestsme = vp9_full_pixel_diamond_new(cpi, x, bsize, &mvp_full, step_param, + lambda, 1, nb_full_mvs, nb_full_mv_num, + &tmp_mv->as_mv); #else // CONFIG_NON_GREEDY_MV bestsme = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb, @@ -2592,11 +2592,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, if (cpi->sf.enhanced_full_pixel_motion_search) { int i; for (i = 0; i < 3; ++i) { -#if CONFIG_NON_GREEDY_MV - double this_me; -#else // CONFIG_NON_GREEDY_MV int this_me; -#endif // CONFIG_NON_GREEDY_MV MV this_mv; int diff_row; int diff_col; @@ -2622,9 +2618,9 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, mvp_full.row >>= 3; #if CONFIG_NON_GREEDY_MV this_me = vp9_full_pixel_diamond_new( - cpi, x, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), - lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num, - &this_mv); + cpi, x, bsize, &mvp_full, + VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), lambda, 1, nb_full_mvs, + nb_full_mv_num, &this_mv); #else // CONFIG_NON_GREEDY_MV this_me = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, @@ -2678,8 +2674,7 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd, // However, once established that vector may be usable through the nearest and // near mv modes to reduce distortion in subsequent blocks and also improve // visual quality. -static int discount_newmv_test(const VP9_COMP *cpi, int this_mode, - int_mv this_mv, +static int discount_newmv_test(VP9_COMP *cpi, int this_mode, int_mv this_mv, int_mv (*mode_mv)[MAX_REF_FRAMES], int ref_frame, int mi_row, int mi_col, BLOCK_SIZE bsize) { #if CONFIG_NON_GREEDY_MV @@ -2689,6 +2684,8 @@ static int discount_newmv_test(const VP9_COMP *cpi, int this_mode, const int gf_group_idx = cpi->twopass.gf_group.index; const int gf_rf_idx = ref_frame_to_gf_rf_idx(ref_frame); const TplDepFrame tpl_frame = cpi->tpl_stats[gf_group_idx]; + const MotionField *motion_field = vp9_motion_field_info_get_motion_field( + &cpi->motion_field_info, gf_group_idx, gf_rf_idx, cpi->tpl_bsize); const int tpl_block_mi_h = num_8x8_blocks_high_lookup[cpi->tpl_bsize]; const int tpl_block_mi_w = num_8x8_blocks_wide_lookup[cpi->tpl_bsize]; const int tpl_mi_row = mi_row - (mi_row % tpl_block_mi_h); @@ -2697,8 +2694,8 @@ static int discount_newmv_test(const VP9_COMP *cpi, int this_mode, tpl_frame .mv_mode_arr[gf_rf_idx][tpl_mi_row * tpl_frame.stride + tpl_mi_col]; if (mv_mode == NEW_MV_MODE) { - int_mv tpl_new_mv = *get_pyramid_mv(&tpl_frame, gf_rf_idx, cpi->tpl_bsize, - tpl_mi_row, tpl_mi_col); + int_mv tpl_new_mv = + vp9_motion_field_mi_get_mv(motion_field, tpl_mi_row, tpl_mi_col); int row_diff = abs(tpl_new_mv.as_mv.row - this_mv.as_mv.row); int col_diff = abs(tpl_new_mv.as_mv.col - this_mv.as_mv.col); if (VPXMAX(row_diff, col_diff) <= 8) { @@ -3455,7 +3452,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, if (cpi->rc.is_src_frame_alt_ref) { if (sf->alt_ref_search_fp) { mode_skip_mask[ALTREF_FRAME] = 0; - ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME); + ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME) & 0xff; ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; } } diff --git a/libvpx/vp9/encoder/vp9_speed_features.c b/libvpx/vp9/encoder/vp9_speed_features.c index 529dca040..0b24b5cb3 100644 --- a/libvpx/vp9/encoder/vp9_speed_features.c +++ b/libvpx/vp9/encoder/vp9_speed_features.c @@ -456,6 +456,7 @@ static void set_rt_speed_feature_framesize_independent( sf->variance_part_thresh_mult = 1; sf->cb_pred_filter_search = 0; sf->force_smooth_interpol = 0; + sf->rt_intra_dc_only_low_content = 0; if (speed >= 1) { sf->allow_txfm_domain_distortion = 1; @@ -535,13 +536,6 @@ static void set_rt_speed_feature_framesize_independent( int i; if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0) sf->use_altref_onepass = 1; - sf->last_partitioning_redo_frequency = 4; - sf->adaptive_rd_thresh = 5; - sf->use_fast_coef_costing = 0; - sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX; - sf->adjust_partitioning_from_last_frame = - cm->last_frame_type != cm->frame_type || - (0 == (frames_since_key + 1) % sf->last_partitioning_redo_frequency); sf->mv.subpel_force_stop = QUARTER_PEL; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_DC_H_V; @@ -550,13 +544,19 @@ static void set_rt_speed_feature_framesize_independent( sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->frame_parameter_update = 0; sf->mv.search_method = FAST_HEX; - - sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW; - sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST; - sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST; - sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST; + sf->allow_skip_recode = 0; sf->max_intra_bsize = BLOCK_32X32; - sf->allow_skip_recode = 1; + sf->use_fast_coef_costing = 0; + sf->use_quant_fp = !is_keyframe; + sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; + sf->adaptive_rd_thresh = 2; + sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH; + sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; + sf->partition_search_type = VAR_BASED_PARTITION; } if (speed >= 5) { @@ -740,12 +740,7 @@ static void set_rt_speed_feature_framesize_independent( sf->nonrd_use_ml_partition = 0; #endif if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = HALF_PEL; - // Only keep INTRA_DC mode for speed 8. - if (!is_keyframe) { - int i = 0; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->intra_y_mode_bsize_mask[i] = INTRA_DC; - } + sf->rt_intra_dc_only_low_content = 1; if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && content != VP9E_CONTENT_SCREEN) { // More aggressive short circuit for speed 8. @@ -771,6 +766,12 @@ static void set_rt_speed_feature_framesize_independent( } if (speed >= 9) { + // Only keep INTRA_DC mode for speed 9. + if (!is_keyframe) { + int i = 0; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->intra_y_mode_bsize_mask[i] = INTRA_DC; + } sf->cb_pred_filter_search = 1; sf->mv.enable_adaptive_subpel_force_stop = 1; sf->mv.adapt_subpel_force_stop.mv_thresh = 1; @@ -817,7 +818,7 @@ static void set_rt_speed_feature_framesize_independent( } // TODO(marpan): There is regression for aq-mode=3 speed <= 4, force it // off for now. - if (speed <= 4 && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + if (speed <= 3 && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) cpi->oxcf.aq_mode = 0; } diff --git a/libvpx/vp9/encoder/vp9_speed_features.h b/libvpx/vp9/encoder/vp9_speed_features.h index eb0628199..ca284ded8 100644 --- a/libvpx/vp9/encoder/vp9_speed_features.h +++ b/libvpx/vp9/encoder/vp9_speed_features.h @@ -608,6 +608,10 @@ typedef struct SPEED_FEATURES { // Force subpel motion filter to always use SMOOTH_FILTER. int force_smooth_interpol; + + // For real-time mode: force DC only under intra search when content + // does not have high souce SAD. + int rt_intra_dc_only_low_content; } SPEED_FEATURES; struct VP9_COMP; diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.c b/libvpx/vp9/encoder/vp9_svc_layercontext.c index 8ba113bf3..32ee6e064 100644 --- a/libvpx/vp9/encoder/vp9_svc_layercontext.c +++ b/libvpx/vp9/encoder/vp9_svc_layercontext.c @@ -57,8 +57,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->simulcast_mode = 0; for (i = 0; i < REF_FRAMES; ++i) { - svc->fb_idx_spatial_layer_id[i] = -1; - svc->fb_idx_temporal_layer_id[i] = -1; + svc->fb_idx_spatial_layer_id[i] = 0xff; + svc->fb_idx_temporal_layer_id[i] = 0xff; svc->fb_idx_base[i] = 0; } for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { @@ -74,6 +74,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->fb_idx_upd_tl0[sl] = -1; svc->drop_count[sl] = 0; svc->spatial_layer_sync[sl] = 0; + svc->force_drop_constrained_from_above[sl] = 0; } svc->max_consec_drop = INT_MAX; @@ -770,6 +771,32 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { svc->mi_rows[svc->spatial_layer_id] = cpi->common.mi_rows; svc->mi_cols[svc->spatial_layer_id] = cpi->common.mi_cols; + // For constrained_from_above drop mode: before encoding superframe (i.e., + // at SL0 frame) check all spatial layers (starting from top) for possible + // drop, and if so, set a flag to force drop of that layer and all its lower + // layers. + if (svc->spatial_layer_to_encode == svc->first_spatial_layer_to_encode) { + int sl; + for (sl = 0; sl < svc->number_spatial_layers; sl++) + svc->force_drop_constrained_from_above[sl] = 0; + if (svc->framedrop_mode == CONSTRAINED_FROM_ABOVE_DROP) { + for (sl = svc->number_spatial_layers - 1; + sl >= svc->first_spatial_layer_to_encode; sl--) { + int layer = sl * svc->number_temporal_layers + svc->temporal_layer_id; + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + cpi->rc = lc->rc; + cpi->oxcf.target_bandwidth = lc->target_bandwidth; + if (vp9_test_drop(cpi)) { + int sl2; + // Set flag to force drop in encoding for this mode. + for (sl2 = sl; sl2 >= svc->first_spatial_layer_to_encode; sl2--) + svc->force_drop_constrained_from_above[sl2] = 1; + break; + } + } + } + } + if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { set_flags_and_fb_idx_for_temporal_mode3(cpi); } else if (svc->temporal_layering_mode == diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.h b/libvpx/vp9/encoder/vp9_svc_layercontext.h index 77d438266..f1ba77970 100644 --- a/libvpx/vp9/encoder/vp9_svc_layercontext.h +++ b/libvpx/vp9/encoder/vp9_svc_layercontext.h @@ -58,7 +58,6 @@ typedef struct { int gold_ref_idx; int has_alt_frame; size_t layer_size; - struct vpx_psnr_pkt psnr_pkt; // Cyclic refresh parameters (aq-mode=3), that need to be updated per-frame. // TODO(jianj/marpan): Is it better to use the full cyclic refresh struct. int sb_index; @@ -138,6 +137,7 @@ typedef struct SVC { int drop_spatial_layer[VPX_MAX_LAYERS]; int framedrop_thresh[VPX_MAX_LAYERS]; int drop_count[VPX_MAX_LAYERS]; + int force_drop_constrained_from_above[VPX_MAX_LAYERS]; int max_consec_drop; SVC_LAYER_DROP_MODE framedrop_mode; diff --git a/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c index aa46c5889..4be6a5ea0 100644 --- a/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c +++ b/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c @@ -114,7 +114,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, // Work out the start point for the search const uint8_t *best_address = in_what; const uint8_t *new_best_address = best_address; -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 __m128i v_ba_q = _mm_set1_epi64x((intptr_t)best_address); #else __m128i v_ba_d = _mm_set1_epi32((intptr_t)best_address); @@ -138,7 +138,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, for (i = 0, step = 0; step < tot_steps; step++) { for (j = 0; j < cfg->searches_per_step; j += 4, i += 4) { __m128i v_sad_d, v_cost_d, v_outside_d, v_inside_d, v_diff_mv_w; -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 __m128i v_blocka[2]; #else __m128i v_blocka[1]; @@ -175,7 +175,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, // Compute the SIMD pointer offsets. { -#if ARCH_X86_64 // sizeof(intptr_t) == 8 +#if VPX_ARCH_X86_64 // sizeof(intptr_t) == 8 // Load the offsets __m128i v_bo10_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 0]); __m128i v_bo32_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 2]); @@ -186,7 +186,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, // Compute the candidate addresses v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q); v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q); -#else // ARCH_X86 // sizeof(intptr_t) == 4 +#else // VPX_ARCH_X86 // sizeof(intptr_t) == 4 __m128i v_bo_d = _mm_loadu_si128((const __m128i *)&ss_os[i]); v_bo_d = _mm_and_si128(v_bo_d, v_inside_d); v_blocka[0] = _mm_add_epi32(v_ba_d, v_bo_d); @@ -294,7 +294,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, best_address = new_best_address; v_bmv_w = _mm_set1_epi32(bmv.as_int); -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 v_ba_q = _mm_set1_epi64x((intptr_t)best_address); #else v_ba_d = _mm_set1_epi32((intptr_t)best_address); diff --git a/libvpx/vp9/encoder/x86/vp9_error_sse2.asm b/libvpx/vp9/encoder/x86/vp9_error_sse2.asm index 11d473b2d..7beec130a 100644 --- a/libvpx/vp9/encoder/x86/vp9_error_sse2.asm +++ b/libvpx/vp9/encoder/x86/vp9_error_sse2.asm @@ -58,7 +58,7 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz movhlps m7, m6 paddq m4, m5 paddq m6, m7 -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 movq rax, m4 movq [sszq], m6 %else @@ -105,7 +105,7 @@ cglobal block_error_fp, 3, 3, 6, uqc, dqc, size ; accumulate horizontally and store in return value movhlps m5, m4 paddq m4, m5 -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 movq rax, m4 %else pshufd m5, m4, 0x1 diff --git a/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c b/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c index 885220a71..e3d803b8f 100644 --- a/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c +++ b/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c @@ -25,7 +25,7 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *iscan) { __m128i zero; __m128i thr; - int16_t nzflag; + int nzflag; __m128i eob; __m128i round, quant, dequant; diff --git a/libvpx/vp9/simple_encode.cc b/libvpx/vp9/simple_encode.cc new file mode 100644 index 000000000..6a35eb6bc --- /dev/null +++ b/libvpx/vp9/simple_encode.cc @@ -0,0 +1,313 @@ +#include <vector> +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/vp9_iface_common.h" +#include "vp9/encoder/vp9_encoder.h" +#include "vp9/encoder/vp9_firstpass.h" +#include "vp9/simple_encode.h" +#include "vp9/vp9_cx_iface.h" + +namespace vp9 { + +// TODO(angiebird): Merge this function with vpx_img_plane_width() +static int img_plane_width(const vpx_image_t *img, int plane) { + if (plane > 0 && img->x_chroma_shift > 0) + return (img->d_w + 1) >> img->x_chroma_shift; + else + return img->d_w; +} + +// TODO(angiebird): Merge this function with vpx_img_plane_height() +static int img_plane_height(const vpx_image_t *img, int plane) { + if (plane > 0 && img->y_chroma_shift > 0) + return (img->d_h + 1) >> img->y_chroma_shift; + else + return img->d_h; +} + +// TODO(angiebird): Merge this function with vpx_img_read() +static int img_read(vpx_image_t *img, FILE *file) { + int plane; + + for (plane = 0; plane < 3; ++plane) { + unsigned char *buf = img->planes[plane]; + const int stride = img->stride[plane]; + const int w = img_plane_width(img, plane) * + ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1); + const int h = img_plane_height(img, plane); + int y; + + for (y = 0; y < h; ++y) { + if (fread(buf, 1, w, file) != (size_t)w) return 0; + buf += stride; + } + } + + return 1; +} + +class SimpleEncode::EncodeImpl { + public: + VP9_COMP *cpi; + vpx_img_fmt_t img_fmt; + vpx_image_t tmp_img; + std::vector<FIRSTPASS_STATS> first_pass_stats; +}; + +static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf, + vpx_img_fmt_t img_fmt) { + VP9_COMP *cpi; + BufferPool *buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(*buffer_pool)); + vp9_initialize_enc(); + cpi = vp9_create_compressor(oxcf, buffer_pool); + vp9_update_compressor_with_img_fmt(cpi, img_fmt); + return cpi; +} + +static void free_encoder(VP9_COMP *cpi) { + BufferPool *buffer_pool = cpi->common.buffer_pool; + vp9_remove_compressor(cpi); + // buffer_pool needs to be free after cpi because buffer_pool contains + // allocated buffers that will be free in vp9_remove_compressor() + vpx_free(buffer_pool); +} + +static INLINE vpx_rational_t make_vpx_rational(int num, int den) { + vpx_rational_t v; + v.num = num; + v.den = den; + return v; +} + +static INLINE FrameType +get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type) { + // TODO(angiebird): Figure out if we need frame type other than key frame, + // alternate reference and inter frame + switch (update_type) { + case KF_UPDATE: return kKeyFrame; break; + case ARF_UPDATE: return kAlternateReference; break; + default: return kInterFrame; break; + } +} + +static void update_encode_frame_result( + EncodeFrameResult *encode_frame_result, + const ENCODE_FRAME_RESULT *encode_frame_info) { + encode_frame_result->coding_data_bit_size = + encode_frame_result->coding_data_byte_size * 8; + encode_frame_result->show_idx = encode_frame_info->show_idx; + encode_frame_result->frame_type = + get_frame_type_from_update_type(encode_frame_info->update_type); + encode_frame_result->psnr = encode_frame_info->psnr; + encode_frame_result->sse = encode_frame_info->sse; + encode_frame_result->quantize_index = encode_frame_info->quantize_index; +} + +SimpleEncode::SimpleEncode(int frame_width, int frame_height, + int frame_rate_num, int frame_rate_den, + int target_bitrate, int num_frames, + const char *infile_path) { + impl_ptr_ = std::unique_ptr<EncodeImpl>(new EncodeImpl()); + frame_width_ = frame_width; + frame_height_ = frame_height; + frame_rate_num_ = frame_rate_num; + frame_rate_den_ = frame_rate_den; + target_bitrate_ = target_bitrate; + num_frames_ = num_frames; + // TODO(angirbid): Should we keep a file pointer here or keep the file_path? + file_ = fopen(infile_path, "r"); + impl_ptr_->cpi = NULL; + impl_ptr_->img_fmt = VPX_IMG_FMT_I420; +} + +void SimpleEncode::ComputeFirstPassStats() { + vpx_rational_t frame_rate = + make_vpx_rational(frame_rate_num_, frame_rate_den_); + const VP9EncoderConfig oxcf = + vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, + target_bitrate_, VPX_RC_FIRST_PASS); + VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt); + struct lookahead_ctx *lookahead = cpi->lookahead; + int i; + int use_highbitdepth = 0; +#if CONFIG_VP9_HIGHBITDEPTH + use_highbitdepth = cpi->common.use_highbitdepth; +#endif + vpx_image_t img; + vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1); + rewind(file_); + impl_ptr_->first_pass_stats.clear(); + for (i = 0; i < num_frames_; ++i) { + assert(!vp9_lookahead_full(lookahead)); + if (img_read(&img, file_)) { + int next_show_idx = vp9_lookahead_next_show_idx(lookahead); + int64_t ts_start = + timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx); + int64_t ts_end = + timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx + 1); + YV12_BUFFER_CONFIG sd; + image2yuvconfig(&img, &sd); + vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0); + { + int64_t time_stamp; + int64_t time_end; + int flush = 1; // Makes vp9_get_compressed_data process a frame + size_t size; + unsigned int frame_flags = 0; + ENCODE_FRAME_RESULT encode_frame_info; + // TODO(angiebird): Call vp9_first_pass directly + vp9_get_compressed_data(cpi, &frame_flags, &size, NULL, &time_stamp, + &time_end, flush, &encode_frame_info); + // vp9_get_compressed_data only generates first pass stats not + // compresses data + assert(size == 0); + } + impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass)); + } + } + vp9_end_first_pass(cpi); + // TODO(angiebird): Store the total_stats apart form first_pass_stats + impl_ptr_->first_pass_stats.push_back(vp9_get_total_stats(&cpi->twopass)); + free_encoder(cpi); + rewind(file_); + vpx_img_free(&img); +} + +std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() { + std::vector<std::vector<double>> output_stats; + // TODO(angiebird): This function make several assumptions of + // FIRSTPASS_STATS. 1) All elements in FIRSTPASS_STATS are double except the + // last one. 2) The last entry of first_pass_stats is the total_stats. + // Change the code structure, so that we don't have to make these assumptions + + // Note the last entry of first_pass_stats is the total_stats, we don't need + // it. + for (size_t i = 0; i < impl_ptr_->first_pass_stats.size() - 1; ++i) { + double *buf_start = + reinterpret_cast<double *>(&impl_ptr_->first_pass_stats[i]); + // We use - 1 here because the last member in FIRSTPASS_STATS is not double + double *buf_end = + buf_start + sizeof(impl_ptr_->first_pass_stats[i]) / sizeof(*buf_end) - + 1; + std::vector<double> this_stats(buf_start, buf_end); + output_stats.push_back(this_stats); + } + return output_stats; +} + +void SimpleEncode::StartEncode() { + assert(impl_ptr_->first_pass_stats.size() > 0); + vpx_rational_t frame_rate = + make_vpx_rational(frame_rate_num_, frame_rate_den_); + VP9EncoderConfig oxcf = + vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, + target_bitrate_, VPX_RC_LAST_PASS); + vpx_fixed_buf_t stats; + stats.buf = impl_ptr_->first_pass_stats.data(); + stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) * + impl_ptr_->first_pass_stats.size(); + + vp9_set_first_pass_stats(&oxcf, &stats); + assert(impl_ptr_->cpi == NULL); + impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt); + vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_, + frame_height_, 1); + rewind(file_); +} + +void SimpleEncode::EndEncode() { + free_encoder(impl_ptr_->cpi); + impl_ptr_->cpi = nullptr; + vpx_img_free(&impl_ptr_->tmp_img); + rewind(file_); +} + +int SimpleEncode::GetKeyFrameGroupSize(int key_frame_index) const { + const VP9_COMP *cpi = impl_ptr_->cpi; + return vp9_get_frames_to_next_key(&cpi->oxcf, &cpi->frame_info, + &cpi->twopass.first_pass_info, + key_frame_index, cpi->rc.min_gf_interval); +} + +void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) { + VP9_COMP *cpi = impl_ptr_->cpi; + struct lookahead_ctx *lookahead = cpi->lookahead; + int use_highbitdepth = 0; +#if CONFIG_VP9_HIGHBITDEPTH + use_highbitdepth = cpi->common.use_highbitdepth; +#endif + // The lookahead's size is set to oxcf->lag_in_frames. + // We want to fill lookahead to it's max capacity if possible so that the + // encoder can construct alt ref frame in time. + // In the other words, we hope vp9_get_compressed_data to encode a frame + // every time in the function + while (!vp9_lookahead_full(lookahead)) { + // TODO(angiebird): Check whether we can move this file read logics to + // lookahead + if (img_read(&impl_ptr_->tmp_img, file_)) { + int next_show_idx = vp9_lookahead_next_show_idx(lookahead); + int64_t ts_start = + timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx); + int64_t ts_end = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, + next_show_idx + 1); + YV12_BUFFER_CONFIG sd; + image2yuvconfig(&impl_ptr_->tmp_img, &sd); + vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0); + } else { + break; + } + } + assert(encode_frame_result->coding_data.get() == nullptr); + const size_t max_coding_data_byte_size = frame_width_ * frame_height_ * 3; + encode_frame_result->coding_data = std::move( + std::unique_ptr<uint8_t[]>(new uint8_t[max_coding_data_byte_size])); + int64_t time_stamp; + int64_t time_end; + int flush = 1; // Make vp9_get_compressed_data encode a frame + unsigned int frame_flags = 0; + ENCODE_FRAME_RESULT encode_frame_info; + vp9_get_compressed_data(cpi, &frame_flags, + &encode_frame_result->coding_data_byte_size, + encode_frame_result->coding_data.get(), &time_stamp, + &time_end, flush, &encode_frame_info); + // vp9_get_compressed_data is expected to encode a frame every time, so the + // data size should be greater than zero. + assert(encode_frame_result->coding_data_byte_size > 0); + assert(encode_frame_result->coding_data_byte_size < + max_coding_data_byte_size); + + update_encode_frame_result(encode_frame_result, &encode_frame_info); +} + +void SimpleEncode::EncodeFrameWithQuantizeIndex( + EncodeFrameResult *encode_frame_result, int quantize_index) { + encode_command_set_external_quantize_index(&impl_ptr_->cpi->encode_command, + quantize_index); + EncodeFrame(encode_frame_result); + encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command); +} + +int SimpleEncode::GetCodingFrameNum() const { + assert(impl_ptr_->first_pass_stats.size() - 1 > 0); + // These are the default settings for now. + const int multi_layer_arf = 0; + const int allow_alt_ref = 1; + vpx_rational_t frame_rate = + make_vpx_rational(frame_rate_num_, frame_rate_den_); + const VP9EncoderConfig oxcf = + vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, + target_bitrate_, VPX_RC_LAST_PASS); + FRAME_INFO frame_info = vp9_get_frame_info(&oxcf); + FIRST_PASS_INFO first_pass_info; + fps_init_first_pass_info(&first_pass_info, impl_ptr_->first_pass_stats.data(), + num_frames_); + return vp9_get_coding_frame_num(&oxcf, &frame_info, &first_pass_info, + multi_layer_arf, allow_alt_ref); +} + +SimpleEncode::~SimpleEncode() { + if (this->file_ != NULL) { + fclose(this->file_); + } +} + +} // namespace vp9 diff --git a/libvpx/vp9/simple_encode.h b/libvpx/vp9/simple_encode.h new file mode 100644 index 000000000..471b4e7a8 --- /dev/null +++ b/libvpx/vp9/simple_encode.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_SIMPLE_ENCODE_H_ +#define VPX_VP9_SIMPLE_ENCODE_H_ + +#include <cstddef> +#include <cstdint> +#include <cstdio> +#include <memory> +#include <vector> + +namespace vp9 { + +enum FrameType { + kKeyFrame = 0, + kInterFrame, + kAlternateReference, +}; + +struct EncodeFrameResult { + int show_idx; + FrameType frame_type; + size_t coding_data_bit_size; + size_t coding_data_byte_size; + // The EncodeFrame will allocate a buffer, write the coding data into the + // buffer and give the ownership of the buffer to coding_data. + std::unique_ptr<unsigned char[]> coding_data; + double psnr; + uint64_t sse; + int quantize_index; +}; + +class SimpleEncode { + public: + SimpleEncode(int frame_width, int frame_height, int frame_rate_num, + int frame_rate_den, int target_bitrate, int num_frames, + const char *infile_path); + ~SimpleEncode(); + SimpleEncode(SimpleEncode &) = delete; + SimpleEncode &operator=(const SimpleEncode &) = delete; + + // Makes encoder compute the first pass stats and store it internally for + // future encode. + void ComputeFirstPassStats(); + + // Outputs the first pass stats represented by a 2-D vector. + // One can use the frame index at first dimension to retrieve the stats for + // each video frame. The stats of each video frame is a vector of 25 double + // values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h + std::vector<std::vector<double>> ObserveFirstPassStats(); + + // Initializes the encoder for actual encoding. + // This function should be called after ComputeFirstPassStats(). + void StartEncode(); + + // Frees the encoder. + // This function should be called after StartEncode() or EncodeFrame(). + void EndEncode(); + + // Given a key_frame_index, computes this key frame group's size. + // The key frame group size includes one key frame plus the number of + // following inter frames. Note that the key frame group size only counts the + // show frames. The number of no show frames like alternate refereces are not + // counted. + int GetKeyFrameGroupSize(int key_frame_index) const; + + // Encodes a frame + // This function should be called after StartEncode() and before EndEncode(). + void EncodeFrame(EncodeFrameResult *encode_frame_result); + + // Encodes a frame with a specific quantize index. + // This function should be called after StartEncode() and before EndEncode(). + void EncodeFrameWithQuantizeIndex(EncodeFrameResult *encode_frame_result, + int quantize_index); + + // Gets the number of coding frames for the video. The coding frames include + // show frame and no show frame. + // This function should be called after ComputeFirstPassStats(). + int GetCodingFrameNum() const; + + private: + class EncodeImpl; + + int frame_width_; + int frame_height_; + int frame_rate_num_; + int frame_rate_den_; + int target_bitrate_; + int num_frames_; + std::FILE *file_; + std::unique_ptr<EncodeImpl> impl_ptr_; +}; + +} // namespace vp9 + +#endif // VPX_VP9_SIMPLE_ENCODE_H_ diff --git a/libvpx/vp9/vp9_common.mk b/libvpx/vp9/vp9_common.mk index c9a55669e..5ef2f891a 100644 --- a/libvpx/vp9/vp9_common.mk +++ b/libvpx/vp9/vp9_common.mk @@ -10,6 +10,7 @@ VP9_COMMON_SRCS-yes += vp9_common.mk VP9_COMMON_SRCS-yes += vp9_iface_common.h +VP9_COMMON_SRCS-yes += vp9_iface_common.c VP9_COMMON_SRCS-yes += common/vp9_ppflags.h VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c VP9_COMMON_SRCS-yes += common/vp9_blockd.c diff --git a/libvpx/vp9/vp9_cx_iface.c b/libvpx/vp9/vp9_cx_iface.c index 45e03f2de..f415e50f7 100644 --- a/libvpx/vp9/vp9_cx_iface.c +++ b/libvpx/vp9/vp9_cx_iface.c @@ -13,6 +13,7 @@ #include "./vpx_config.h" #include "vpx/vpx_encoder.h" +#include "vpx_dsp/psnr.h" #include "vpx_ports/vpx_once.h" #include "vpx_ports/system_state.h" #include "vpx_util/vpx_timestamp.h" @@ -20,10 +21,14 @@ #include "./vpx_version.h" #include "vp9/encoder/vp9_encoder.h" #include "vpx/vp8cx.h" +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/vp9_cx_iface.h" #include "vp9/encoder/vp9_firstpass.h" +#include "vp9/encoder/vp9_lookahead.h" +#include "vp9/vp9_cx_iface.h" #include "vp9/vp9_iface_common.h" -struct vp9_extracfg { +typedef struct vp9_extracfg { int cpu_used; // available cpu percentage in 1/16 unsigned int enable_auto_alt_ref; unsigned int noise_sensitivity; @@ -55,7 +60,7 @@ struct vp9_extracfg { int render_height; unsigned int row_mt; unsigned int motion_vector_unit_test; -}; +} vp9_extracfg; static struct vp9_extracfg default_extra_cfg = { 0, // cpu_used @@ -466,6 +471,15 @@ static void config_target_level(VP9EncoderConfig *oxcf) { } } +static vpx_rational64_t get_g_timebase_in_ts(vpx_rational_t g_timebase) { + vpx_rational64_t g_timebase_in_ts; + g_timebase_in_ts.den = g_timebase.den; + g_timebase_in_ts.num = g_timebase.num; + g_timebase_in_ts.num *= TICKS_PER_SEC; + reduce_ratio(&g_timebase_in_ts); + return g_timebase_in_ts; +} + static vpx_codec_err_t set_encoder_config( VP9EncoderConfig *oxcf, const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) { @@ -477,9 +491,13 @@ static vpx_codec_err_t set_encoder_config( oxcf->height = cfg->g_h; oxcf->bit_depth = cfg->g_bit_depth; oxcf->input_bit_depth = cfg->g_input_bit_depth; + // TODO(angiebird): Figure out if we can just use g_timebase to indicate the + // inverse of framerate // guess a frame rate if out of whack, use 30 oxcf->init_framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num; if (oxcf->init_framerate > 180) oxcf->init_framerate = 30; + oxcf->g_timebase = cfg->g_timebase; + oxcf->g_timebase_in_ts = get_g_timebase_in_ts(oxcf->g_timebase); oxcf->mode = GOOD; @@ -539,10 +557,16 @@ static vpx_codec_err_t set_encoder_config( oxcf->speed = abs(extra_cfg->cpu_used); oxcf->encode_breakout = extra_cfg->static_thresh; oxcf->enable_auto_arf = extra_cfg->enable_auto_alt_ref; - oxcf->noise_sensitivity = extra_cfg->noise_sensitivity; + if (oxcf->bit_depth == VPX_BITS_8) { + oxcf->noise_sensitivity = extra_cfg->noise_sensitivity; + } else { + // Disable denoiser for high bitdepth since vp9_denoiser_filter only works + // for 8 bits. + oxcf->noise_sensitivity = 0; + } oxcf->sharpness = extra_cfg->sharpness; - oxcf->two_pass_stats_in = cfg->rc_twopass_stats_in; + vp9_set_first_pass_stats(oxcf, &cfg->rc_twopass_stats_in); #if CONFIG_FP_MB_STATS oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in; @@ -611,40 +635,7 @@ static vpx_codec_err_t set_encoder_config( } if (get_level_index(oxcf->target_level) >= 0) config_target_level(oxcf); - /* - printf("Current VP9 Settings: \n"); - printf("target_bandwidth: %d\n", oxcf->target_bandwidth); - printf("target_level: %d\n", oxcf->target_level); - printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity); - printf("sharpness: %d\n", oxcf->sharpness); - printf("cpu_used: %d\n", oxcf->cpu_used); - printf("Mode: %d\n", oxcf->mode); - printf("auto_key: %d\n", oxcf->auto_key); - printf("key_freq: %d\n", oxcf->key_freq); - printf("end_usage: %d\n", oxcf->end_usage); - printf("under_shoot_pct: %d\n", oxcf->under_shoot_pct); - printf("over_shoot_pct: %d\n", oxcf->over_shoot_pct); - printf("starting_buffer_level: %d\n", oxcf->starting_buffer_level); - printf("optimal_buffer_level: %d\n", oxcf->optimal_buffer_level); - printf("maximum_buffer_size: %d\n", oxcf->maximum_buffer_size); - printf("fixed_q: %d\n", oxcf->fixed_q); - printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q); - printf("best_allowed_q: %d\n", oxcf->best_allowed_q); - printf("allow_spatial_resampling: %d\n", oxcf->allow_spatial_resampling); - printf("scaled_frame_width: %d\n", oxcf->scaled_frame_width); - printf("scaled_frame_height: %d\n", oxcf->scaled_frame_height); - printf("two_pass_vbrbias: %d\n", oxcf->two_pass_vbrbias); - printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section); - printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section); - printf("vbr_corpus_complexity: %d\n", oxcf->vbr_corpus_complexity); - printf("lag_in_frames: %d\n", oxcf->lag_in_frames); - printf("enable_auto_arf: %d\n", oxcf->enable_auto_arf); - printf("Version: %d\n", oxcf->Version); - printf("encode_breakout: %d\n", oxcf->encode_breakout); - printf("error resilient: %d\n", oxcf->error_resilient_mode); - printf("frame parallel detokenization: %d\n", - oxcf->frame_parallel_decoding_mode); - */ + // vp9_dump_encoder_config(oxcf); return VPX_CODEC_OK; } @@ -935,10 +926,9 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, if (res == VPX_CODEC_OK) { priv->pts_offset_initialized = 0; - priv->timestamp_ratio.den = priv->cfg.g_timebase.den; - priv->timestamp_ratio.num = (int64_t)priv->cfg.g_timebase.num; - priv->timestamp_ratio.num *= TICKS_PER_SEC; - reduce_ratio(&priv->timestamp_ratio); + // TODO(angiebird): Replace priv->timestamp_ratio by + // oxcf->g_timebase_in_ts + priv->timestamp_ratio = get_g_timebase_in_ts(priv->cfg.g_timebase); set_encoder_config(&priv->oxcf, &priv->cfg, &priv->extra_cfg); #if CONFIG_VP9_HIGHBITDEPTH @@ -946,10 +936,7 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, (ctx->init_flags & VPX_CODEC_USE_HIGHBITDEPTH) ? 1 : 0; #endif priv->cpi = vp9_create_compressor(&priv->oxcf, priv->buffer_pool); - if (priv->cpi == NULL) - res = VPX_CODEC_MEM_ERROR; - else - priv->cpi->output_pkt_list = &priv->pkt_list.head; + if (priv->cpi == NULL) res = VPX_CODEC_MEM_ERROR; } } @@ -1067,18 +1054,6 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { return index_sz; } -static int64_t timebase_units_to_ticks(const vpx_rational64_t *timestamp_ratio, - int64_t n) { - return n * timestamp_ratio->num / timestamp_ratio->den; -} - -static int64_t ticks_to_timebase_units(const vpx_rational64_t *timestamp_ratio, - int64_t n) { - int64_t round = timestamp_ratio->num / 2; - if (round > 0) --round; - return (n * timestamp_ratio->den + round) / timestamp_ratio->num; -} - static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi, unsigned int lib_flags) { vpx_codec_frame_flags_t flags = lib_flags << 16; @@ -1096,6 +1071,27 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi, return flags; } +static INLINE vpx_codec_cx_pkt_t get_psnr_pkt(const PSNR_STATS *psnr) { + vpx_codec_cx_pkt_t pkt; + pkt.kind = VPX_CODEC_PSNR_PKT; + pkt.data.psnr = *psnr; + return pkt; +} + +#if !CONFIG_REALTIME_ONLY +static INLINE vpx_codec_cx_pkt_t +get_first_pass_stats_pkt(FIRSTPASS_STATS *stats) { + // WARNNING: This function assumes that stats will + // exist and not be changed until the packet is processed + // TODO(angiebird): Refactor the code to avoid using the assumption. + vpx_codec_cx_pkt_t pkt; + pkt.kind = VPX_CODEC_STATS_PKT; + pkt.data.twopass_stats.buf = stats; + pkt.data.twopass_stats.sz = sizeof(*stats); + return pkt; +} +#endif + const size_t kMinCompressedSize = 8192; static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img, @@ -1109,19 +1105,11 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, VP9_COMP *const cpi = ctx->cpi; const vpx_rational64_t *const timestamp_ratio = &ctx->timestamp_ratio; size_t data_sz; + vpx_codec_cx_pkt_t pkt; + memset(&pkt, 0, sizeof(pkt)); if (cpi == NULL) return VPX_CODEC_INVALID_PARAM; - if (cpi->oxcf.pass == 2 && cpi->level_constraint.level_index >= 0 && - !cpi->level_constraint.rc_config_updated) { - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - TWO_PASS *const twopass = &cpi->twopass; - FIRSTPASS_STATS *stats = &twopass->total_stats; - twopass->bits_left = - (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0); - cpi->level_constraint.rc_config_updated = 1; - } - if (img != NULL) { res = validate_img(ctx, img); if (res == VPX_CODEC_OK) { @@ -1223,92 +1211,135 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, } } - while (cx_data_sz >= ctx->cx_data_sz / 2 && - -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data, - &dst_time_stamp, &dst_end_time_stamp, - !img)) { - if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) { - vpx_codec_cx_pkt_t pkt; - - // Pack invisible frames with the next visible frame - if (!cpi->common.show_frame || - (cpi->use_svc && - cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)) { - if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data; - ctx->pending_cx_data_sz += size; - if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; - ctx->pending_frame_magnitude |= size; - cx_data += size; - cx_data_sz -= size; + if (cpi->oxcf.pass == 1 && !cpi->use_svc) { +#if !CONFIG_REALTIME_ONLY + // compute first pass stats + if (img) { + int ret; + ENCODE_FRAME_RESULT encode_frame_result; + vpx_codec_cx_pkt_t fps_pkt; + // TODO(angiebird): Call vp9_first_pass directly + ret = vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data, + &dst_time_stamp, &dst_end_time_stamp, + !img, &encode_frame_result); + assert(size == 0); // There is no compressed data in the first pass + (void)ret; + assert(ret == 0); + fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.this_frame_stats); + vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt); + } else { + if (!cpi->twopass.first_pass_done) { + vpx_codec_cx_pkt_t fps_pkt; + vp9_end_first_pass(cpi); + fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.total_stats); + vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt); + } + } +#else // !CONFIG_REALTIME_ONLY + assert(0); +#endif // !CONFIG_REALTIME_ONLY + } else { + ENCODE_FRAME_RESULT encode_frame_result; + while (cx_data_sz >= ctx->cx_data_sz / 2 && + -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data, + &dst_time_stamp, &dst_end_time_stamp, + !img, &encode_frame_result)) { + // Pack psnr pkt + if (size > 0 && !cpi->use_svc) { + // TODO(angiebird): Figure out while we don't need psnr pkt when + // use_svc is on + PSNR_STATS psnr; + if (vp9_get_psnr(cpi, &psnr)) { + vpx_codec_cx_pkt_t psnr_pkt = get_psnr_pkt(&psnr); + vpx_codec_pkt_list_add(&ctx->pkt_list.head, &psnr_pkt); + } + } + + if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) { + // Pack invisible frames with the next visible frame + if (!cpi->common.show_frame || + (cpi->use_svc && cpi->svc.spatial_layer_id < + cpi->svc.number_spatial_layers - 1)) { + if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data; + ctx->pending_cx_data_sz += size; + if (size) + ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; + ctx->pending_frame_magnitude |= size; + cx_data += size; + cx_data_sz -= size; + pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width; + pkt.data.frame.height[cpi->svc.spatial_layer_id] = + cpi->common.height; + pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] = + 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id]; + + if (ctx->output_cx_pkt_cb.output_cx_pkt) { + pkt.kind = VPX_CODEC_CX_FRAME_PKT; + pkt.data.frame.pts = + ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) + + ctx->pts_offset; + pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units( + timestamp_ratio, dst_end_time_stamp - dst_time_stamp); + pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); + pkt.data.frame.buf = ctx->pending_cx_data; + pkt.data.frame.sz = size; + ctx->pending_cx_data = NULL; + ctx->pending_cx_data_sz = 0; + ctx->pending_frame_count = 0; + ctx->pending_frame_magnitude = 0; + ctx->output_cx_pkt_cb.output_cx_pkt( + &pkt, ctx->output_cx_pkt_cb.user_priv); + } + continue; + } + + // Add the frame packet to the list of returned packets. + pkt.kind = VPX_CODEC_CX_FRAME_PKT; + pkt.data.frame.pts = + ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) + + ctx->pts_offset; + pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units( + timestamp_ratio, dst_end_time_stamp - dst_time_stamp); + pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width; pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height; pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] = 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id]; - if (ctx->output_cx_pkt_cb.output_cx_pkt) { - pkt.kind = VPX_CODEC_CX_FRAME_PKT; - pkt.data.frame.pts = - ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) + - ctx->pts_offset; - pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units( - timestamp_ratio, dst_end_time_stamp - dst_time_stamp); - pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); + if (ctx->pending_cx_data) { + if (size) + ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; + ctx->pending_frame_magnitude |= size; + ctx->pending_cx_data_sz += size; + // write the superframe only for the case when + if (!ctx->output_cx_pkt_cb.output_cx_pkt) + size += write_superframe_index(ctx); pkt.data.frame.buf = ctx->pending_cx_data; - pkt.data.frame.sz = size; + pkt.data.frame.sz = ctx->pending_cx_data_sz; ctx->pending_cx_data = NULL; ctx->pending_cx_data_sz = 0; ctx->pending_frame_count = 0; ctx->pending_frame_magnitude = 0; + } else { + pkt.data.frame.buf = cx_data; + pkt.data.frame.sz = size; + } + pkt.data.frame.partition_id = -1; + + if (ctx->output_cx_pkt_cb.output_cx_pkt) ctx->output_cx_pkt_cb.output_cx_pkt( &pkt, ctx->output_cx_pkt_cb.user_priv); - } - continue; - } + else + vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); - // Add the frame packet to the list of returned packets. - pkt.kind = VPX_CODEC_CX_FRAME_PKT; - pkt.data.frame.pts = - ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) + - ctx->pts_offset; - pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units( - timestamp_ratio, dst_end_time_stamp - dst_time_stamp); - pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); - pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width; - pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height; - pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] = - 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id]; - - if (ctx->pending_cx_data) { - if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; - ctx->pending_frame_magnitude |= size; - ctx->pending_cx_data_sz += size; - // write the superframe only for the case when - if (!ctx->output_cx_pkt_cb.output_cx_pkt) - size += write_superframe_index(ctx); - pkt.data.frame.buf = ctx->pending_cx_data; - pkt.data.frame.sz = ctx->pending_cx_data_sz; - ctx->pending_cx_data = NULL; - ctx->pending_cx_data_sz = 0; - ctx->pending_frame_count = 0; - ctx->pending_frame_magnitude = 0; - } else { - pkt.data.frame.buf = cx_data; - pkt.data.frame.sz = size; - } - pkt.data.frame.partition_id = -1; - - if (ctx->output_cx_pkt_cb.output_cx_pkt) - ctx->output_cx_pkt_cb.output_cx_pkt(&pkt, - ctx->output_cx_pkt_cb.user_priv); - else - vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); - - cx_data += size; - cx_data_sz -= size; - if (is_one_pass_cbr_svc(cpi) && - (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { - // Encoded all spatial layers; exit loop. - break; + cx_data += size; + cx_data_sz -= size; + if (is_one_pass_cbr_svc(cpi) && + (cpi->svc.spatial_layer_id == + cpi->svc.number_spatial_layers - 1)) { + // Encoded all spatial layers; exit loop. + break; + } } } } @@ -1765,7 +1796,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { VPX_VBR, // rc_end_usage { NULL, 0 }, // rc_twopass_stats_in { NULL, 0 }, // rc_firstpass_mb_stats_in - 256, // rc_target_bandwidth + 256, // rc_target_bitrate 0, // rc_min_quantizer 63, // rc_max_quantizer 25, // rc_undershoot_pct @@ -1831,3 +1862,222 @@ CODEC_INTERFACE(vpx_codec_vp9_cx) = { NULL // vpx_codec_enc_mr_get_mem_loc_fn_t } }; + +static vpx_codec_enc_cfg_t get_enc_cfg(int frame_width, int frame_height, + vpx_rational_t frame_rate, + int target_bitrate, + vpx_enc_pass enc_pass) { + vpx_codec_enc_cfg_t enc_cfg = encoder_usage_cfg_map[0].cfg; + enc_cfg.g_w = frame_width; + enc_cfg.g_h = frame_height; + enc_cfg.rc_target_bitrate = target_bitrate; + enc_cfg.g_pass = enc_pass; + // g_timebase is the inverse of frame_rate + enc_cfg.g_timebase.num = frame_rate.den; + enc_cfg.g_timebase.den = frame_rate.num; + return enc_cfg; +} + +static vp9_extracfg get_extra_cfg() { + vp9_extracfg extra_cfg = default_extra_cfg; + return extra_cfg; +} + +VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height, + vpx_rational_t frame_rate, + int target_bitrate, + vpx_enc_pass enc_pass) { + /* This function will generate the same VP9EncoderConfig used by the + * vpxenc command given below. + * The configs in the vpxenc command corresponds to parameters of + * vp9_get_encoder_config() as follows. + * + * WIDTH: frame_width + * HEIGHT: frame_height + * FPS: frame_rate + * BITRATE: target_bitrate + * + * INPUT, OUTPUT, LIMIT will not affect VP9EncoderConfig + * + * vpxenc command: + * INPUT=bus_cif.y4m + * OUTPUT=output.webm + * WIDTH=352 + * HEIGHT=288 + * BITRATE=600 + * FPS=30/1 + * LIMIT=150 + * ./vpxenc --limit=$LIMIT --width=$WIDTH --height=$HEIGHT --fps=$FPS + * --lag-in-frames=25 \ + * --codec=vp9 --good --cpu-used=0 --threads=0 --profile=0 \ + * --min-q=0 --max-q=63 --auto-alt-ref=1 --passes=2 --kf-max-dist=150 \ + * --kf-min-dist=0 --drop-frame=0 --static-thresh=0 --bias-pct=50 \ + * --minsection-pct=0 --maxsection-pct=150 --arnr-maxframes=7 --psnr \ + * --arnr-strength=5 --sharpness=0 --undershoot-pct=100 --overshoot-pct=100 \ + * --frame-parallel=0 --tile-columns=0 --cpu-used=0 --end-usage=vbr \ + * --target-bitrate=$BITRATE -o $OUTPUT $INPUT + */ + + VP9EncoderConfig oxcf; + vp9_extracfg extra_cfg = get_extra_cfg(); + vpx_codec_enc_cfg_t enc_cfg = get_enc_cfg( + frame_width, frame_height, frame_rate, target_bitrate, enc_pass); + set_encoder_config(&oxcf, &enc_cfg, &extra_cfg); + + // These settings are made to match the settings of the vpxenc command. + oxcf.key_freq = 150; + oxcf.under_shoot_pct = 100; + oxcf.over_shoot_pct = 100; + oxcf.max_threads = 0; + oxcf.tile_columns = 0; + oxcf.frame_parallel_decoding_mode = 0; + oxcf.two_pass_vbrmax_section = 150; + return oxcf; +} + +#define DUMP_STRUCT_VALUE(struct, value) \ + printf(#value " %" PRId64 "\n", (int64_t)(struct)->value) + +void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf) { + DUMP_STRUCT_VALUE(oxcf, profile); + DUMP_STRUCT_VALUE(oxcf, bit_depth); + DUMP_STRUCT_VALUE(oxcf, width); + DUMP_STRUCT_VALUE(oxcf, height); + DUMP_STRUCT_VALUE(oxcf, input_bit_depth); + DUMP_STRUCT_VALUE(oxcf, init_framerate); + // TODO(angiebird): dump g_timebase + // TODO(angiebird): dump g_timebase_in_ts + + DUMP_STRUCT_VALUE(oxcf, target_bandwidth); + + DUMP_STRUCT_VALUE(oxcf, noise_sensitivity); + DUMP_STRUCT_VALUE(oxcf, sharpness); + DUMP_STRUCT_VALUE(oxcf, speed); + DUMP_STRUCT_VALUE(oxcf, rc_max_intra_bitrate_pct); + DUMP_STRUCT_VALUE(oxcf, rc_max_inter_bitrate_pct); + DUMP_STRUCT_VALUE(oxcf, gf_cbr_boost_pct); + + DUMP_STRUCT_VALUE(oxcf, mode); + DUMP_STRUCT_VALUE(oxcf, pass); + + // Key Framing Operations + DUMP_STRUCT_VALUE(oxcf, auto_key); + DUMP_STRUCT_VALUE(oxcf, key_freq); + + DUMP_STRUCT_VALUE(oxcf, lag_in_frames); + + // ---------------------------------------------------------------- + // DATARATE CONTROL OPTIONS + + // vbr, cbr, constrained quality or constant quality + DUMP_STRUCT_VALUE(oxcf, rc_mode); + + // buffer targeting aggressiveness + DUMP_STRUCT_VALUE(oxcf, under_shoot_pct); + DUMP_STRUCT_VALUE(oxcf, over_shoot_pct); + + // buffering parameters + // TODO(angiebird): dump tarting_buffer_level_ms + // TODO(angiebird): dump ptimal_buffer_level_ms + // TODO(angiebird): dump maximum_buffer_size_ms + + // Frame drop threshold. + DUMP_STRUCT_VALUE(oxcf, drop_frames_water_mark); + + // controlling quality + DUMP_STRUCT_VALUE(oxcf, fixed_q); + DUMP_STRUCT_VALUE(oxcf, worst_allowed_q); + DUMP_STRUCT_VALUE(oxcf, best_allowed_q); + DUMP_STRUCT_VALUE(oxcf, cq_level); + DUMP_STRUCT_VALUE(oxcf, aq_mode); + + // Special handling of Adaptive Quantization for AltRef frames + DUMP_STRUCT_VALUE(oxcf, alt_ref_aq); + + // Internal frame size scaling. + DUMP_STRUCT_VALUE(oxcf, resize_mode); + DUMP_STRUCT_VALUE(oxcf, scaled_frame_width); + DUMP_STRUCT_VALUE(oxcf, scaled_frame_height); + + // Enable feature to reduce the frame quantization every x frames. + DUMP_STRUCT_VALUE(oxcf, frame_periodic_boost); + + // two pass datarate control + DUMP_STRUCT_VALUE(oxcf, two_pass_vbrbias); + DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmin_section); + DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmax_section); + DUMP_STRUCT_VALUE(oxcf, vbr_corpus_complexity); + // END DATARATE CONTROL OPTIONS + // ---------------------------------------------------------------- + + // Spatial and temporal scalability. + DUMP_STRUCT_VALUE(oxcf, ss_number_layers); + DUMP_STRUCT_VALUE(oxcf, ts_number_layers); + + // Bitrate allocation for spatial layers. + // TODO(angiebird): dump layer_target_bitrate[VPX_MAX_LAYERS] + // TODO(angiebird): dump ss_target_bitrate[VPX_SS_MAX_LAYERS] + // TODO(angiebird): dump ss_enable_auto_arf[VPX_SS_MAX_LAYERS] + // TODO(angiebird): dump ts_rate_decimator[VPX_TS_MAX_LAYERS] + + DUMP_STRUCT_VALUE(oxcf, enable_auto_arf); + DUMP_STRUCT_VALUE(oxcf, encode_breakout); + DUMP_STRUCT_VALUE(oxcf, error_resilient_mode); + DUMP_STRUCT_VALUE(oxcf, frame_parallel_decoding_mode); + + DUMP_STRUCT_VALUE(oxcf, arnr_max_frames); + DUMP_STRUCT_VALUE(oxcf, arnr_strength); + + DUMP_STRUCT_VALUE(oxcf, min_gf_interval); + DUMP_STRUCT_VALUE(oxcf, max_gf_interval); + + DUMP_STRUCT_VALUE(oxcf, tile_columns); + DUMP_STRUCT_VALUE(oxcf, tile_rows); + + DUMP_STRUCT_VALUE(oxcf, enable_tpl_model); + + DUMP_STRUCT_VALUE(oxcf, max_threads); + + DUMP_STRUCT_VALUE(oxcf, target_level); + + // TODO(angiebird): dump two_pass_stats_in + +#if CONFIG_FP_MB_STATS + // TODO(angiebird): dump firstpass_mb_stats_in +#endif + + DUMP_STRUCT_VALUE(oxcf, tuning); + DUMP_STRUCT_VALUE(oxcf, content); +#if CONFIG_VP9_HIGHBITDEPTH + DUMP_STRUCT_VALUE(oxcf, use_highbitdepth); +#endif + DUMP_STRUCT_VALUE(oxcf, color_space); + DUMP_STRUCT_VALUE(oxcf, color_range); + DUMP_STRUCT_VALUE(oxcf, render_width); + DUMP_STRUCT_VALUE(oxcf, render_height); + DUMP_STRUCT_VALUE(oxcf, temporal_layering_mode); + + DUMP_STRUCT_VALUE(oxcf, row_mt); + DUMP_STRUCT_VALUE(oxcf, motion_vector_unit_test); +} + +FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf) { + FRAME_INFO frame_info; + int dummy; + frame_info.frame_width = oxcf->width; + frame_info.frame_height = oxcf->height; + frame_info.render_frame_width = oxcf->width; + frame_info.render_frame_height = oxcf->height; + frame_info.bit_depth = oxcf->bit_depth; + vp9_set_mi_size(&frame_info.mi_rows, &frame_info.mi_cols, &dummy, + frame_info.frame_width, frame_info.frame_height); + vp9_set_mb_size(&frame_info.mb_rows, &frame_info.mb_cols, &frame_info.num_mbs, + frame_info.mi_rows, frame_info.mi_cols); + // TODO(angiebird): Figure out how to get subsampling_x/y here + return frame_info; +} + +void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf, + const vpx_fixed_buf_t *stats) { + oxcf->two_pass_stats_in = *stats; +} diff --git a/libvpx/vp9/vp9_cx_iface.h b/libvpx/vp9/vp9_cx_iface.h new file mode 100644 index 000000000..08569fcc9 --- /dev/null +++ b/libvpx/vp9/vp9_cx_iface.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_VP9_CX_IFACE_H_ +#define VPX_VP9_VP9_CX_IFACE_H_ +#include "vp9/encoder/vp9_encoder.h" +#include "vp9/common/vp9_onyxc_int.h" + +#ifdef __cplusplus +extern "C" { +#endif + +VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height, + vpx_rational_t frame_rate, + int target_bitrate, + vpx_enc_pass enc_pass); + +void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf); + +FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf); + +static INLINE int64_t +timebase_units_to_ticks(const vpx_rational64_t *timestamp_ratio, int64_t n) { + return n * timestamp_ratio->num / timestamp_ratio->den; +} + +static INLINE int64_t +ticks_to_timebase_units(const vpx_rational64_t *timestamp_ratio, int64_t n) { + int64_t round = timestamp_ratio->num / 2; + if (round > 0) --round; + return (n * timestamp_ratio->den + round) / timestamp_ratio->num; +} + +void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf, + const vpx_fixed_buf_t *stats); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VP9_VP9_CX_IFACE_H_ diff --git a/libvpx/vp9/vp9_iface_common.c b/libvpx/vp9/vp9_iface_common.c new file mode 100644 index 000000000..74d08a587 --- /dev/null +++ b/libvpx/vp9/vp9_iface_common.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file in the root of the source tree. An additional + * intellectual property rights grant can be found in the file PATENTS. + * All contributing project authors may be found in the AUTHORS file in + * the root of the source tree. + */ + +#include "vp9/vp9_iface_common.h" +void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, + void *user_priv) { + /** vpx_img_wrap() doesn't allow specifying independent strides for + * the Y, U, and V planes, nor other alignment adjustments that + * might be representable by a YV12_BUFFER_CONFIG, so we just + * initialize all the fields.*/ + int bps; + if (!yv12->subsampling_y) { + if (!yv12->subsampling_x) { + img->fmt = VPX_IMG_FMT_I444; + bps = 24; + } else { + img->fmt = VPX_IMG_FMT_I422; + bps = 16; + } + } else { + if (!yv12->subsampling_x) { + img->fmt = VPX_IMG_FMT_I440; + bps = 16; + } else { + img->fmt = VPX_IMG_FMT_I420; + bps = 12; + } + } + img->cs = yv12->color_space; + img->range = yv12->color_range; + img->bit_depth = 8; + img->w = yv12->y_stride; + img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); + img->d_w = yv12->y_crop_width; + img->d_h = yv12->y_crop_height; + img->r_w = yv12->render_width; + img->r_h = yv12->render_height; + img->x_chroma_shift = yv12->subsampling_x; + img->y_chroma_shift = yv12->subsampling_y; + img->planes[VPX_PLANE_Y] = yv12->y_buffer; + img->planes[VPX_PLANE_U] = yv12->u_buffer; + img->planes[VPX_PLANE_V] = yv12->v_buffer; + img->planes[VPX_PLANE_ALPHA] = NULL; + img->stride[VPX_PLANE_Y] = yv12->y_stride; + img->stride[VPX_PLANE_U] = yv12->uv_stride; + img->stride[VPX_PLANE_V] = yv12->uv_stride; + img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; +#if CONFIG_VP9_HIGHBITDEPTH + if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) { + // vpx_image_t uses byte strides and a pointer to the first byte + // of the image. + img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH); + img->bit_depth = yv12->bit_depth; + img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer); + img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer); + img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer); + img->planes[VPX_PLANE_ALPHA] = NULL; + img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride; + img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride; + img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride; + img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + img->bps = bps; + img->user_priv = user_priv; + img->img_data = yv12->buffer_alloc; + img->img_data_owner = 0; + img->self_allocd = 0; +} + +vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, + YV12_BUFFER_CONFIG *yv12) { + yv12->y_buffer = img->planes[VPX_PLANE_Y]; + yv12->u_buffer = img->planes[VPX_PLANE_U]; + yv12->v_buffer = img->planes[VPX_PLANE_V]; + + yv12->y_crop_width = img->d_w; + yv12->y_crop_height = img->d_h; + yv12->render_width = img->r_w; + yv12->render_height = img->r_h; + yv12->y_width = img->d_w; + yv12->y_height = img->d_h; + + yv12->uv_width = + img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width; + yv12->uv_height = + img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height; + yv12->uv_crop_width = yv12->uv_width; + yv12->uv_crop_height = yv12->uv_height; + + yv12->y_stride = img->stride[VPX_PLANE_Y]; + yv12->uv_stride = img->stride[VPX_PLANE_U]; + yv12->color_space = img->cs; + yv12->color_range = img->range; + +#if CONFIG_VP9_HIGHBITDEPTH + if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + // In vpx_image_t + // planes point to uint8 address of start of data + // stride counts uint8s to reach next row + // In YV12_BUFFER_CONFIG + // y_buffer, u_buffer, v_buffer point to uint16 address of data + // stride and border counts in uint16s + // This means that all the address calculations in the main body of code + // should work correctly. + // However, before we do any pixel operations we need to cast the address + // to a uint16 ponter and double its value. + yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer); + yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer); + yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer); + yv12->y_stride >>= 1; + yv12->uv_stride >>= 1; + yv12->flags = YV12_FLAG_HIGHBITDEPTH; + } else { + yv12->flags = 0; + } + yv12->border = (yv12->y_stride - img->w) / 2; +#else + yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; +#endif // CONFIG_VP9_HIGHBITDEPTH + yv12->subsampling_x = img->x_chroma_shift; + yv12->subsampling_y = img->y_chroma_shift; + return VPX_CODEC_OK; +} diff --git a/libvpx/vp9/vp9_iface_common.h b/libvpx/vp9/vp9_iface_common.h index a1921db63..e646917c6 100644 --- a/libvpx/vp9/vp9_iface_common.h +++ b/libvpx/vp9/vp9_iface_common.h @@ -10,130 +10,24 @@ #ifndef VPX_VP9_VP9_IFACE_COMMON_H_ #define VPX_VP9_VP9_IFACE_COMMON_H_ +#include <assert.h> #include "vpx_ports/mem.h" +#include "vpx/vp8.h" +#include "vpx_scale/yv12config.h" +#include "common/vp9_enums.h" -static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, - void *user_priv) { - /** vpx_img_wrap() doesn't allow specifying independent strides for - * the Y, U, and V planes, nor other alignment adjustments that - * might be representable by a YV12_BUFFER_CONFIG, so we just - * initialize all the fields.*/ - int bps; - if (!yv12->subsampling_y) { - if (!yv12->subsampling_x) { - img->fmt = VPX_IMG_FMT_I444; - bps = 24; - } else { - img->fmt = VPX_IMG_FMT_I422; - bps = 16; - } - } else { - if (!yv12->subsampling_x) { - img->fmt = VPX_IMG_FMT_I440; - bps = 16; - } else { - img->fmt = VPX_IMG_FMT_I420; - bps = 12; - } - } - img->cs = yv12->color_space; - img->range = yv12->color_range; - img->bit_depth = 8; - img->w = yv12->y_stride; - img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); - img->d_w = yv12->y_crop_width; - img->d_h = yv12->y_crop_height; - img->r_w = yv12->render_width; - img->r_h = yv12->render_height; - img->x_chroma_shift = yv12->subsampling_x; - img->y_chroma_shift = yv12->subsampling_y; - img->planes[VPX_PLANE_Y] = yv12->y_buffer; - img->planes[VPX_PLANE_U] = yv12->u_buffer; - img->planes[VPX_PLANE_V] = yv12->v_buffer; - img->planes[VPX_PLANE_ALPHA] = NULL; - img->stride[VPX_PLANE_Y] = yv12->y_stride; - img->stride[VPX_PLANE_U] = yv12->uv_stride; - img->stride[VPX_PLANE_V] = yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; -#if CONFIG_VP9_HIGHBITDEPTH - if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) { - // vpx_image_t uses byte strides and a pointer to the first byte - // of the image. - img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH); - img->bit_depth = yv12->bit_depth; - img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer); - img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer); - img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer); - img->planes[VPX_PLANE_ALPHA] = NULL; - img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride; - img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride; - img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride; - } -#endif // CONFIG_VP9_HIGHBITDEPTH - img->bps = bps; - img->user_priv = user_priv; - img->img_data = yv12->buffer_alloc; - img->img_data_owner = 0; - img->self_allocd = 0; -} - -static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, - YV12_BUFFER_CONFIG *yv12) { - yv12->y_buffer = img->planes[VPX_PLANE_Y]; - yv12->u_buffer = img->planes[VPX_PLANE_U]; - yv12->v_buffer = img->planes[VPX_PLANE_V]; - - yv12->y_crop_width = img->d_w; - yv12->y_crop_height = img->d_h; - yv12->render_width = img->r_w; - yv12->render_height = img->r_h; - yv12->y_width = img->d_w; - yv12->y_height = img->d_h; +#ifdef __cplusplus +extern "C" { +#endif - yv12->uv_width = - img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width; - yv12->uv_height = - img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height; - yv12->uv_crop_width = yv12->uv_width; - yv12->uv_crop_height = yv12->uv_height; +void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, + void *user_priv); - yv12->y_stride = img->stride[VPX_PLANE_Y]; - yv12->uv_stride = img->stride[VPX_PLANE_U]; - yv12->color_space = img->cs; - yv12->color_range = img->range; +vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, + YV12_BUFFER_CONFIG *yv12); -#if CONFIG_VP9_HIGHBITDEPTH - if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { - // In vpx_image_t - // planes point to uint8 address of start of data - // stride counts uint8s to reach next row - // In YV12_BUFFER_CONFIG - // y_buffer, u_buffer, v_buffer point to uint16 address of data - // stride and border counts in uint16s - // This means that all the address calculations in the main body of code - // should work correctly. - // However, before we do any pixel operations we need to cast the address - // to a uint16 ponter and double its value. - yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer); - yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer); - yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer); - yv12->y_stride >>= 1; - yv12->uv_stride >>= 1; - yv12->flags = YV12_FLAG_HIGHBITDEPTH; - } else { - yv12->flags = 0; - } - yv12->border = (yv12->y_stride - img->w) / 2; -#else - yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; -#endif // CONFIG_VP9_HIGHBITDEPTH - yv12->subsampling_x = img->x_chroma_shift; - yv12->subsampling_y = img->y_chroma_shift; - return VPX_CODEC_OK; -} - -static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { +static INLINE VP9_REFFRAME +ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { switch (frame) { case VP8_LAST_FRAME: return VP9_LAST_FLAG; case VP8_GOLD_FRAME: return VP9_GOLD_FLAG; @@ -142,4 +36,9 @@ static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { assert(0 && "Invalid Reference Frame"); return VP9_LAST_FLAG; } + +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VPX_VP9_VP9_IFACE_COMMON_H_ diff --git a/libvpx/vp9/vp9cx.mk b/libvpx/vp9/vp9cx.mk index 736ff0170..ad774505c 100644 --- a/libvpx/vp9/vp9cx.mk +++ b/libvpx/vp9/vp9cx.mk @@ -16,6 +16,10 @@ VP9_CX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes) VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) VP9_CX_SRCS-yes += vp9_cx_iface.c +VP9_CX_SRCS-yes += vp9_cx_iface.h + +VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.cc +VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.h VP9_CX_SRCS-yes += encoder/vp9_bitstream.c VP9_CX_SRCS-yes += encoder/vp9_context_tree.c @@ -76,6 +80,8 @@ VP9_CX_SRCS-yes += encoder/vp9_resize.c VP9_CX_SRCS-yes += encoder/vp9_resize.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.c VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.h +VP9_CX_SRCS-$(CONFIG_NON_GREEDY_MV) += encoder/vp9_non_greedy_mv.c +VP9_CX_SRCS-$(CONFIG_NON_GREEDY_MV) += encoder/vp9_non_greedy_mv.h VP9_CX_SRCS-yes += encoder/vp9_tokenize.c VP9_CX_SRCS-yes += encoder/vp9_treewriter.c @@ -116,7 +122,7 @@ endif VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm -ifeq ($(ARCH_X86_64),yes) +ifeq ($(VPX_ARCH_X86_64),yes) VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm endif |