aboutsummaryrefslogtreecommitdiff
path: root/libvpx/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'libvpx/vp9')
-rw-r--r--libvpx/vp9/common/vp9_alloccommon.c23
-rw-r--r--libvpx/vp9/common/vp9_alloccommon.h5
-rw-r--r--libvpx/vp9/common/vp9_blockd.h1
-rw-r--r--libvpx/vp9/common/vp9_mv.h2
-rw-r--r--libvpx/vp9/common/vp9_onyxc_int.h36
-rw-r--r--libvpx/vp9/common/vp9_postproc.c28
-rw-r--r--libvpx/vp9/common/vp9_postproc.h8
-rw-r--r--libvpx/vp9/common/vp9_reconinter.c4
-rw-r--r--libvpx/vp9/common/vp9_thread_common.c84
-rw-r--r--libvpx/vp9/common/vp9_thread_common.h2
-rw-r--r--libvpx/vp9/decoder/vp9_decodeframe.c55
-rw-r--r--libvpx/vp9/decoder/vp9_decodemv.c95
-rw-r--r--libvpx/vp9/decoder/vp9_decoder.h1
-rw-r--r--libvpx/vp9/decoder/vp9_detokenize.c4
-rw-r--r--libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c55
-rw-r--r--libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c3
-rw-r--r--libvpx/vp9/encoder/vp9_encodeframe.c20
-rw-r--r--libvpx/vp9/encoder/vp9_encoder.c1055
-rw-r--r--libvpx/vp9/encoder/vp9_encoder.h120
-rw-r--r--libvpx/vp9/encoder/vp9_firstpass.c959
-rw-r--r--libvpx/vp9/encoder/vp9_firstpass.h42
-rw-r--r--libvpx/vp9/encoder/vp9_lookahead.c21
-rw-r--r--libvpx/vp9/encoder/vp9_lookahead.h33
-rw-r--r--libvpx/vp9/encoder/vp9_mcomp.c567
-rw-r--r--libvpx/vp9/encoder/vp9_mcomp.h34
-rw-r--r--libvpx/vp9/encoder/vp9_non_greedy_mv.c533
-rw-r--r--libvpx/vp9/encoder/vp9_non_greedy_mv.h129
-rw-r--r--libvpx/vp9/encoder/vp9_pickmode.c22
-rw-r--r--libvpx/vp9/encoder/vp9_ratectrl.c93
-rw-r--r--libvpx/vp9/encoder/vp9_ratectrl.h4
-rw-r--r--libvpx/vp9/encoder/vp9_rd.h2
-rw-r--r--libvpx/vp9/encoder/vp9_rdopt.c39
-rw-r--r--libvpx/vp9/encoder/vp9_speed_features.c41
-rw-r--r--libvpx/vp9/encoder/vp9_speed_features.h4
-rw-r--r--libvpx/vp9/encoder/vp9_svc_layercontext.c31
-rw-r--r--libvpx/vp9/encoder/vp9_svc_layercontext.h2
-rw-r--r--libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c10
-rw-r--r--libvpx/vp9/encoder/x86/vp9_error_sse2.asm4
-rw-r--r--libvpx/vp9/encoder/x86/vp9_quantize_sse2.c2
-rw-r--r--libvpx/vp9/simple_encode.cc313
-rw-r--r--libvpx/vp9/simple_encode.h104
-rw-r--r--libvpx/vp9/vp9_common.mk1
-rw-r--r--libvpx/vp9/vp9_cx_iface.c534
-rw-r--r--libvpx/vp9/vp9_cx_iface.h48
-rw-r--r--libvpx/vp9/vp9_iface_common.c131
-rw-r--r--libvpx/vp9/vp9_iface_common.h137
-rw-r--r--libvpx/vp9/vp9cx.mk8
47 files changed, 3557 insertions, 1892 deletions
diff --git a/libvpx/vp9/common/vp9_alloccommon.c b/libvpx/vp9/common/vp9_alloccommon.c
index 7345e259b..5702dca71 100644
--- a/libvpx/vp9/common/vp9_alloccommon.c
+++ b/libvpx/vp9/common/vp9_alloccommon.c
@@ -17,17 +17,26 @@
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_onyxc_int.h"
-void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) {
+void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width,
+ int height) {
const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
+ *mi_cols = aligned_width >> MI_SIZE_LOG2;
+ *mi_rows = aligned_height >> MI_SIZE_LOG2;
+ *mi_stride = calc_mi_size(*mi_cols);
+}
- cm->mi_cols = aligned_width >> MI_SIZE_LOG2;
- cm->mi_rows = aligned_height >> MI_SIZE_LOG2;
- cm->mi_stride = calc_mi_size(cm->mi_cols);
+void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows,
+ int mi_cols) {
+ *mb_cols = (mi_cols + 1) >> 1;
+ *mb_rows = (mi_rows + 1) >> 1;
+ *mb_num = (*mb_rows) * (*mb_cols);
+}
- cm->mb_cols = (cm->mi_cols + 1) >> 1;
- cm->mb_rows = (cm->mi_rows + 1) >> 1;
- cm->MBs = cm->mb_rows * cm->mb_cols;
+void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) {
+ vp9_set_mi_size(&cm->mi_rows, &cm->mi_cols, &cm->mi_stride, width, height);
+ vp9_set_mb_size(&cm->mb_rows, &cm->mb_cols, &cm->MBs, cm->mi_rows,
+ cm->mi_cols);
}
static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) {
diff --git a/libvpx/vp9/common/vp9_alloccommon.h b/libvpx/vp9/common/vp9_alloccommon.h
index 8900038ea..90cbb093d 100644
--- a/libvpx/vp9/common/vp9_alloccommon.h
+++ b/libvpx/vp9/common/vp9_alloccommon.h
@@ -33,6 +33,11 @@ void vp9_free_postproc_buffers(struct VP9Common *cm);
int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height);
void vp9_free_state_buffers(struct VP9Common *cm);
+void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width,
+ int height);
+void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows,
+ int mi_cols);
+
void vp9_set_mb_mi(struct VP9Common *cm, int width, int height);
void vp9_swap_current_and_last_seg_map(struct VP9Common *cm);
diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h
index 2ddc0f121..6ef8127a5 100644
--- a/libvpx/vp9/common/vp9_blockd.h
+++ b/libvpx/vp9/common/vp9_blockd.h
@@ -60,6 +60,7 @@ typedef struct {
#define GOLDEN_FRAME 2
#define ALTREF_FRAME 3
#define MAX_REF_FRAMES 4
+#define MAX_INTER_REF_FRAMES 3
typedef int8_t MV_REFERENCE_FRAME;
diff --git a/libvpx/vp9/common/vp9_mv.h b/libvpx/vp9/common/vp9_mv.h
index 14dde7dd0..76f93cf0b 100644
--- a/libvpx/vp9/common/vp9_mv.h
+++ b/libvpx/vp9/common/vp9_mv.h
@@ -19,6 +19,8 @@
extern "C" {
#endif
+#define INVALID_MV 0x80008000
+
typedef struct mv {
int16_t row;
int16_t col;
diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h
index 662b8ef5e..f3942a8f0 100644
--- a/libvpx/vp9/common/vp9_onyxc_int.h
+++ b/libvpx/vp9/common/vp9_onyxc_int.h
@@ -244,14 +244,6 @@ typedef struct VP9Common {
int byte_alignment;
int skip_loop_filter;
- // Private data associated with the frame buffer callbacks.
- void *cb_priv;
- vpx_get_frame_buffer_cb_fn_t get_fb_cb;
- vpx_release_frame_buffer_cb_fn_t release_fb_cb;
-
- // Handles memory for the codec.
- InternalFrameBufferList int_frame_buffers;
-
// External BufferPool passed from outside.
BufferPool *buffer_pool;
@@ -262,6 +254,34 @@ typedef struct VP9Common {
int lf_row;
} VP9_COMMON;
+typedef struct {
+ int frame_width;
+ int frame_height;
+ int render_frame_width;
+ int render_frame_height;
+ int mi_rows;
+ int mi_cols;
+ int mb_rows;
+ int mb_cols;
+ int num_mbs;
+ vpx_bit_depth_t bit_depth;
+} FRAME_INFO;
+
+static INLINE void init_frame_info(FRAME_INFO *frame_info,
+ const VP9_COMMON *cm) {
+ frame_info->frame_width = cm->width;
+ frame_info->frame_height = cm->height;
+ frame_info->render_frame_width = cm->render_width;
+ frame_info->render_frame_height = cm->render_height;
+ frame_info->mi_cols = cm->mi_cols;
+ frame_info->mi_rows = cm->mi_rows;
+ frame_info->mb_cols = cm->mb_cols;
+ frame_info->mb_rows = cm->mb_rows;
+ frame_info->num_mbs = cm->MBs;
+ frame_info->bit_depth = cm->bit_depth;
+ // TODO(angiebird): Figure out how to get subsampling_x/y here
+}
+
static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) {
if (index < 0 || index >= FRAME_BUFFERS) return NULL;
if (cm->error.error_code != VPX_CODEC_OK) return NULL;
diff --git a/libvpx/vp9/common/vp9_postproc.c b/libvpx/vp9/common/vp9_postproc.c
index 5373b0218..d2c8535b0 100644
--- a/libvpx/vp9/common/vp9_postproc.c
+++ b/libvpx/vp9/common/vp9_postproc.c
@@ -183,7 +183,8 @@ void vp9_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch, int rows, int cols,
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
+static void deblock_and_de_macro_block(VP9_COMMON *cm,
+ YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *post, int q,
int low_var_thresh, int flag,
uint8_t *limits) {
@@ -216,7 +217,7 @@ static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
source->uv_height, source->uv_width, ppl);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
- vp9_deblock(source, post, q, limits);
+ vp9_deblock(cm, source, post, q, limits);
vpx_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
post->y_width, q2mbl(q));
vpx_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
@@ -226,8 +227,8 @@ static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
#endif // CONFIG_VP9_HIGHBITDEPTH
}
-void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
- uint8_t *limits) {
+void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) {
const int ppl =
(int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q + 0.0065 + 0.5);
#if CONFIG_VP9_HIGHBITDEPTH
@@ -252,9 +253,8 @@ void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
int mbr;
- const int mb_rows = src->y_height / 16;
- const int mb_cols = src->y_width / 16;
-
+ const int mb_rows = cm->mb_rows;
+ const int mb_cols = cm->mb_cols;
memset(limits, (unsigned char)ppl, 16 * mb_cols);
for (mbr = 0; mbr < mb_rows; mbr++) {
@@ -276,9 +276,9 @@ void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
#endif // CONFIG_VP9_HIGHBITDEPTH
}
-void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
- uint8_t *limits) {
- vp9_deblock(src, dst, q, limits);
+void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) {
+ vp9_deblock(cm, src, dst, q, limits);
}
static void swap_mi_and_prev_mi(VP9_COMMON *cm) {
@@ -383,21 +383,21 @@ int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest,
vpx_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int);
}
if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) {
- deblock_and_de_macro_block(&cm->post_proc_buffer_int, ppbuf,
+ deblock_and_de_macro_block(cm, &cm->post_proc_buffer_int, ppbuf,
q + (ppflags->deblocking_level - 5) * 10, 1, 0,
cm->postproc_state.limits);
} else if (flags & VP9D_DEBLOCK) {
- vp9_deblock(&cm->post_proc_buffer_int, ppbuf, q,
+ vp9_deblock(cm, &cm->post_proc_buffer_int, ppbuf, q,
cm->postproc_state.limits);
} else {
vpx_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf);
}
} else if (flags & VP9D_DEMACROBLOCK) {
- deblock_and_de_macro_block(cm->frame_to_show, ppbuf,
+ deblock_and_de_macro_block(cm, cm->frame_to_show, ppbuf,
q + (ppflags->deblocking_level - 5) * 10, 1, 0,
cm->postproc_state.limits);
} else if (flags & VP9D_DEBLOCK) {
- vp9_deblock(cm->frame_to_show, ppbuf, q, cm->postproc_state.limits);
+ vp9_deblock(cm, cm->frame_to_show, ppbuf, q, cm->postproc_state.limits);
} else {
vpx_yv12_copy_frame(cm->frame_to_show, ppbuf);
}
diff --git a/libvpx/vp9/common/vp9_postproc.h b/libvpx/vp9/common/vp9_postproc.h
index 67efc1b4e..bbe3aed83 100644
--- a/libvpx/vp9/common/vp9_postproc.h
+++ b/libvpx/vp9/common/vp9_postproc.h
@@ -40,11 +40,11 @@ struct VP9Common;
int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest,
vp9_ppflags_t *ppflags, int unscaled_width);
-void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
- uint8_t *limits);
+void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits);
-void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
- uint8_t *limits);
+void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits);
#ifdef __cplusplus
} // extern "C"
diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c
index 04f41e6a3..ff59ff504 100644
--- a/libvpx/vp9/common/vp9_reconinter.c
+++ b/libvpx/vp9/common/vp9_reconinter.c
@@ -96,8 +96,8 @@ MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, int bw,
const int spel_right = spel_left - SUBPEL_SHIFTS;
const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS;
const int spel_bottom = spel_top - SUBPEL_SHIFTS;
- MV clamped_mv = { src_mv->row * (1 << (1 - ss_y)),
- src_mv->col * (1 << (1 - ss_x)) };
+ MV clamped_mv = { (short)(src_mv->row * (1 << (1 - ss_y))),
+ (short)(src_mv->col * (1 << (1 - ss_x))) };
assert(ss_x <= 1);
assert(ss_y <= 1);
diff --git a/libvpx/vp9/common/vp9_thread_common.c b/libvpx/vp9/common/vp9_thread_common.c
index c79d9b7f0..b3d50162b 100644
--- a/libvpx/vp9/common/vp9_thread_common.c
+++ b/libvpx/vp9/common/vp9_thread_common.c
@@ -298,7 +298,10 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
pthread_cond_init(&lf_sync->cond[i], NULL);
}
}
- pthread_mutex_init(&lf_sync->lf_mutex, NULL);
+
+ CHECK_MEM_ERROR(cm, lf_sync->lf_mutex,
+ vpx_malloc(sizeof(*lf_sync->lf_mutex)));
+ pthread_mutex_init(lf_sync->lf_mutex, NULL);
CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex,
vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows));
@@ -339,47 +342,50 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
// Deallocate lf synchronization related mutex and data
void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) {
- if (lf_sync != NULL) {
+ assert(lf_sync != NULL);
+
#if CONFIG_MULTITHREAD
+ if (lf_sync->mutex != NULL) {
int i;
-
- if (lf_sync->mutex != NULL) {
- for (i = 0; i < lf_sync->rows; ++i) {
- pthread_mutex_destroy(&lf_sync->mutex[i]);
- }
- vpx_free(lf_sync->mutex);
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->mutex[i]);
}
- if (lf_sync->cond != NULL) {
- for (i = 0; i < lf_sync->rows; ++i) {
- pthread_cond_destroy(&lf_sync->cond[i]);
- }
- vpx_free(lf_sync->cond);
+ vpx_free(lf_sync->mutex);
+ }
+ if (lf_sync->cond != NULL) {
+ int i;
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_cond_destroy(&lf_sync->cond[i]);
}
- if (lf_sync->recon_done_mutex != NULL) {
- int i;
- for (i = 0; i < lf_sync->rows; ++i) {
- pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
- }
- vpx_free(lf_sync->recon_done_mutex);
+ vpx_free(lf_sync->cond);
+ }
+ if (lf_sync->recon_done_mutex != NULL) {
+ int i;
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
}
+ vpx_free(lf_sync->recon_done_mutex);
+ }
- pthread_mutex_destroy(&lf_sync->lf_mutex);
- if (lf_sync->recon_done_cond != NULL) {
- int i;
- for (i = 0; i < lf_sync->rows; ++i) {
- pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
- }
- vpx_free(lf_sync->recon_done_cond);
+ if (lf_sync->lf_mutex != NULL) {
+ pthread_mutex_destroy(lf_sync->lf_mutex);
+ vpx_free(lf_sync->lf_mutex);
+ }
+ if (lf_sync->recon_done_cond != NULL) {
+ int i;
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
}
+ vpx_free(lf_sync->recon_done_cond);
+ }
#endif // CONFIG_MULTITHREAD
- vpx_free(lf_sync->lfdata);
- vpx_free(lf_sync->cur_sb_col);
- vpx_free(lf_sync->num_tiles_done);
- // clear the structure as the source of this call may be a resize in which
- // case this call will be followed by an _alloc() which may fail.
- vp9_zero(*lf_sync);
- }
+ vpx_free(lf_sync->lfdata);
+ vpx_free(lf_sync->cur_sb_col);
+ vpx_free(lf_sync->num_tiles_done);
+ // clear the structure as the source of this call may be a resize in which
+ // case this call will be followed by an _alloc() which may fail.
+ vp9_zero(*lf_sync);
}
static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
@@ -390,7 +396,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
#if CONFIG_MULTITHREAD
const int tile_cols = 1 << cm->log2_tile_cols;
- pthread_mutex_lock(&lf_sync->lf_mutex);
+ pthread_mutex_lock(lf_sync->lf_mutex);
if (cm->lf_row < max_rows) {
cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
return_val = cm->lf_row;
@@ -401,7 +407,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
cur_row += 1;
}
}
- pthread_mutex_unlock(&lf_sync->lf_mutex);
+ pthread_mutex_unlock(lf_sync->lf_mutex);
if (return_val == -1) return return_val;
@@ -411,7 +417,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
&lf_sync->recon_done_mutex[cur_row]);
}
pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]);
- pthread_mutex_lock(&lf_sync->lf_mutex);
+ pthread_mutex_lock(lf_sync->lf_mutex);
if (lf_sync->corrupted) {
int row = return_val >> MI_BLOCK_SIZE_LOG2;
pthread_mutex_lock(&lf_sync->mutex[row]);
@@ -420,7 +426,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
pthread_mutex_unlock(&lf_sync->mutex[row]);
return_val = -1;
}
- pthread_mutex_unlock(&lf_sync->lf_mutex);
+ pthread_mutex_unlock(lf_sync->lf_mutex);
#else
(void)lf_sync;
if (cm->lf_row < max_rows) {
@@ -455,9 +461,9 @@ void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync) {
void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
int corrupted) {
#if CONFIG_MULTITHREAD
- pthread_mutex_lock(&lf_sync->lf_mutex);
+ pthread_mutex_lock(lf_sync->lf_mutex);
lf_sync->corrupted |= corrupted;
- pthread_mutex_unlock(&lf_sync->lf_mutex);
+ pthread_mutex_unlock(lf_sync->lf_mutex);
pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
lf_sync->num_tiles_done[row] += 1;
if (num_tiles == lf_sync->num_tiles_done[row]) {
diff --git a/libvpx/vp9/common/vp9_thread_common.h b/libvpx/vp9/common/vp9_thread_common.h
index 94c9de659..5df0117f1 100644
--- a/libvpx/vp9/common/vp9_thread_common.h
+++ b/libvpx/vp9/common/vp9_thread_common.h
@@ -40,7 +40,7 @@ typedef struct VP9LfSyncData {
int num_active_workers; // number of scheduled workers.
#if CONFIG_MULTITHREAD
- pthread_mutex_t lf_mutex;
+ pthread_mutex_t *lf_mutex;
pthread_mutex_t *recon_done_mutex;
pthread_cond_t *recon_done_cond;
#endif
diff --git a/libvpx/vp9/decoder/vp9_decodeframe.c b/libvpx/vp9/decoder/vp9_decodeframe.c
index 7d66cb2b2..e8b386994 100644
--- a/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -529,16 +529,15 @@ static void high_build_mc_border(const uint8_t *src8, int src_stride,
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
-static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
- int x0, int y0, int b_w, int b_h,
- int frame_width, int frame_height,
+static void extend_and_predict(TileWorkerData *twd, const uint8_t *buf_ptr1,
+ int pre_buf_stride, int x0, int y0, int b_w,
+ int b_h, int frame_width, int frame_height,
int border_offset, uint8_t *const dst,
int dst_buf_stride, int subpel_x, int subpel_y,
const InterpKernel *kernel,
const struct scale_factors *sf, MACROBLOCKD *xd,
int w, int h, int ref, int xs, int ys) {
- DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]);
-
+ uint16_t *mc_buf_high = twd->extend_and_predict_buf;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, x0, y0,
b_w, b_h, frame_width, frame_height);
@@ -554,15 +553,15 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
}
}
#else
-static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
- int x0, int y0, int b_w, int b_h,
- int frame_width, int frame_height,
+static void extend_and_predict(TileWorkerData *twd, const uint8_t *buf_ptr1,
+ int pre_buf_stride, int x0, int y0, int b_w,
+ int b_h, int frame_width, int frame_height,
int border_offset, uint8_t *const dst,
int dst_buf_stride, int subpel_x, int subpel_y,
const InterpKernel *kernel,
const struct scale_factors *sf, int w, int h,
int ref, int xs, int ys) {
- DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
+ uint8_t *mc_buf = (uint8_t *)twd->extend_and_predict_buf;
const uint8_t *buf_ptr;
build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, x0, y0, b_w, b_h,
@@ -575,8 +574,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
#endif // CONFIG_VP9_HIGHBITDEPTH
static void dec_build_inter_predictors(
- MACROBLOCKD *xd, int plane, int bw, int bh, int x, int y, int w, int h,
- int mi_x, int mi_y, const InterpKernel *kernel,
+ TileWorkerData *twd, MACROBLOCKD *xd, int plane, int bw, int bh, int x,
+ int y, int w, int h, int mi_x, int mi_y, const InterpKernel *kernel,
const struct scale_factors *sf, struct buf_2d *pre_buf,
struct buf_2d *dst_buf, const MV *mv, RefCntBuffer *ref_frame_buf,
int is_scaled, int ref) {
@@ -687,9 +686,9 @@ static void dec_build_inter_predictors(
const int b_h = y1 - y0 + 1;
const int border_offset = y_pad * 3 * b_w + x_pad * 3;
- extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h, frame_width,
- frame_height, border_offset, dst, dst_buf->stride,
- subpel_x, subpel_y, kernel, sf,
+ extend_and_predict(twd, buf_ptr1, buf_stride, x0, y0, b_w, b_h,
+ frame_width, frame_height, border_offset, dst,
+ dst_buf->stride, subpel_x, subpel_y, kernel, sf,
#if CONFIG_VP9_HIGHBITDEPTH
xd,
#endif
@@ -712,7 +711,8 @@ static void dec_build_inter_predictors(
#endif // CONFIG_VP9_HIGHBITDEPTH
}
-static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
+static void dec_build_inter_predictors_sb(TileWorkerData *twd,
+ VP9Decoder *const pbi,
MACROBLOCKD *xd, int mi_row,
int mi_col) {
int plane;
@@ -755,10 +755,10 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
for (y = 0; y < num_4x4_h; ++y) {
for (x = 0; x < num_4x4_w; ++x) {
const MV mv = average_split_mvs(pd, mi, ref, i++);
- dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 4 * x, 4 * y,
- 4, 4, mi_x, mi_y, kernel, sf, pre_buf,
- dst_buf, &mv, ref_frame_buf, is_scaled,
- ref);
+ dec_build_inter_predictors(twd, xd, plane, n4w_x4, n4h_x4, 4 * x,
+ 4 * y, 4, 4, mi_x, mi_y, kernel, sf,
+ pre_buf, dst_buf, &mv, ref_frame_buf,
+ is_scaled, ref);
}
}
}
@@ -772,7 +772,7 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
const int n4w_x4 = 4 * num_4x4_w;
const int n4h_x4 = 4 * num_4x4_h;
struct buf_2d *const pre_buf = &pd->pre[ref];
- dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4,
+ dec_build_inter_predictors(twd, xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4,
n4h_x4, mi_x, mi_y, kernel, sf, pre_buf,
dst_buf, &mv, ref_frame_buf, is_scaled, ref);
}
@@ -964,7 +964,7 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row,
}
} else {
// Prediction
- dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
+ dec_build_inter_predictors_sb(twd, pbi, xd, mi_row, mi_col);
#if CONFIG_MISMATCH_DEBUG
{
int plane;
@@ -1048,7 +1048,7 @@ static void recon_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row,
predict_and_reconstruct_intra_block_row_mt);
} else {
// Prediction
- dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
+ dec_build_inter_predictors_sb(twd, pbi, xd, mi_row, mi_col);
// Reconstruction
if (!mi->skip) {
@@ -1733,9 +1733,9 @@ static int lpf_map_write_check(VP9LfSync *lf_sync, int row, int num_tile_cols) {
int return_val = 0;
#if CONFIG_MULTITHREAD
int corrupted;
- pthread_mutex_lock(&lf_sync->lf_mutex);
+ pthread_mutex_lock(lf_sync->lf_mutex);
corrupted = lf_sync->corrupted;
- pthread_mutex_unlock(&lf_sync->lf_mutex);
+ pthread_mutex_unlock(lf_sync->lf_mutex);
if (!corrupted) {
pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
lf_sync->num_tiles_done[row] += 1;
@@ -1905,6 +1905,7 @@ static int row_decode_worker_hook(void *arg1, void *arg2) {
LFWorkerData *lf_data = thread_data->lf_data;
VP9LfSync *lf_sync = thread_data->lf_sync;
volatile int corrupted = 0;
+ TileWorkerData *volatile tile_data_recon = NULL;
while (!vp9_jobq_dequeue(&row_mt_worker_data->jobq, &job, sizeof(job), 1)) {
int mi_col;
@@ -1921,9 +1922,10 @@ static int row_decode_worker_hook(void *arg1, void *arg2) {
} else if (job.job_type == RECON_JOB) {
const int cur_sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
const int is_last_row = sb_rows - 1 == cur_sb_row;
- TileWorkerData twd_recon;
- TileWorkerData *const tile_data_recon = &twd_recon;
int mi_col_start, mi_col_end;
+ if (!tile_data_recon)
+ CHECK_MEM_ERROR(cm, tile_data_recon,
+ vpx_memalign(32, sizeof(TileWorkerData)));
tile_data_recon->xd = pbi->mb;
vp9_tile_init(&tile_data_recon->xd.tile, cm, 0, job.tile_col);
@@ -2006,6 +2008,7 @@ static int row_decode_worker_hook(void *arg1, void *arg2) {
}
}
+ vpx_free(tile_data_recon);
return !corrupted;
}
diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c
index 943fe478a..49c675394 100644
--- a/libvpx/vp9/decoder/vp9_decodemv.c
+++ b/libvpx/vp9/decoder/vp9_decodemv.c
@@ -444,17 +444,6 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
}
}
-static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv,
- int refmv_count) {
- int i;
-
- // Make sure all the candidates are properly clamped etc
- for (i = 0; i < refmv_count; ++i) {
- lower_mv_precision(&mvlist[i].as_mv, allow_hp);
- *best_mv = mvlist[i];
- }
-}
-
// This macro is used to add a motion vector mv_ref list if it isn't
// already in the list. If it's the second motion vector or early_break
// it will also skip all additional processing and jump to Done!
@@ -494,7 +483,7 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame,
const POSITION *const mv_ref_search,
int_mv *mv_ref_list, int mi_row, int mi_col,
- int block, int is_sub8x8) {
+ int block) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
int different_ref_found = 0;
@@ -511,7 +500,7 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
i = 0;
- if (is_sub8x8) {
+ if (block >= 0) {
// If the size < 8x8 we get the mv from the bmi substructure for the
// nearest two blocks.
for (i = 0; i < 2; ++i) {
@@ -628,19 +617,22 @@ static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
assert(MAX_MV_REF_CANDIDATES == 2);
- refmv_count =
- dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search,
- mv_list, mi_row, mi_col, block, 1);
-
switch (block) {
- case 0: best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; break;
+ case 0:
+ refmv_count =
+ dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search,
+ mv_list, mi_row, mi_col, block);
+ best_sub8x8->as_int = mv_list[refmv_count - 1].as_int;
+ break;
case 1:
case 2:
if (b_mode == NEARESTMV) {
best_sub8x8->as_int = bmi[0].as_mv[ref].as_int;
} else {
+ dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search,
+ mv_list, mi_row, mi_col, block);
best_sub8x8->as_int = 0;
- for (n = 0; n < refmv_count; ++n)
+ for (n = 0; n < 2; ++n)
if (bmi[0].as_mv[ref].as_int != mv_list[n].as_int) {
best_sub8x8->as_int = mv_list[n].as_int;
break;
@@ -651,15 +643,20 @@ static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
if (b_mode == NEARESTMV) {
best_sub8x8->as_int = bmi[2].as_mv[ref].as_int;
} else {
- int_mv candidates[2 + MAX_MV_REF_CANDIDATES];
- candidates[0] = bmi[1].as_mv[ref];
- candidates[1] = bmi[0].as_mv[ref];
- candidates[2] = mv_list[0];
- candidates[3] = mv_list[1];
best_sub8x8->as_int = 0;
- for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n)
- if (bmi[2].as_mv[ref].as_int != candidates[n].as_int) {
- best_sub8x8->as_int = candidates[n].as_int;
+ if (bmi[2].as_mv[ref].as_int != bmi[1].as_mv[ref].as_int) {
+ best_sub8x8->as_int = bmi[1].as_mv[ref].as_int;
+ break;
+ }
+ if (bmi[2].as_mv[ref].as_int != bmi[0].as_mv[ref].as_int) {
+ best_sub8x8->as_int = bmi[0].as_mv[ref].as_int;
+ break;
+ }
+ dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search,
+ mv_list, mi_row, mi_col, block);
+ for (n = 0; n < 2; ++n)
+ if (bmi[2].as_mv[ref].as_int != mv_list[n].as_int) {
+ best_sub8x8->as_int = mv_list[n].as_int;
break;
}
}
@@ -715,26 +712,6 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
} else {
if (bsize >= BLOCK_8X8)
mi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
- else
- // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV.
- // Setting mode to NEARESTMV forces the search to stop after the nearestmv
- // has been found. After b_modes have been read, mode will be overwritten
- // by the last b_mode.
- mi->mode = NEARESTMV;
-
- if (mi->mode != ZEROMV) {
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- int_mv tmp_mvs[MAX_MV_REF_CANDIDATES];
- const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
- int refmv_count;
-
- refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search,
- tmp_mvs, mi_row, mi_col, -1, 0);
-
- dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref],
- refmv_count);
- }
- }
}
mi->interp_filter = (cm->interp_filter == SWITCHABLE)
@@ -746,6 +723,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
const int num_4x4_h = 1 << xd->bmode_blocks_hl;
int idx, idy;
PREDICTION_MODE b_mode;
+ int got_mv_refs_for_new = 0;
int_mv best_sub8x8[2];
const uint32_t invalid_mv = 0x80008000;
// Initialize the 2nd element as even though it won't be used meaningfully
@@ -760,6 +738,18 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
for (ref = 0; ref < 1 + is_compound; ++ref)
append_sub8x8_mvs_for_idx(cm, xd, mv_ref_search, b_mode, j, ref,
mi_row, mi_col, &best_sub8x8[ref]);
+ } else if (b_mode == NEWMV && !got_mv_refs_for_new) {
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ int_mv tmp_mvs[MAX_MV_REF_CANDIDATES];
+ const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
+
+ dec_find_mv_refs(cm, xd, NEWMV, frame, mv_ref_search, tmp_mvs,
+ mi_row, mi_col, -1);
+
+ lower_mv_precision(&tmp_mvs[0].as_mv, allow_hp);
+ best_ref_mvs[ref] = tmp_mvs[0];
+ got_mv_refs_for_new = 1;
+ }
}
if (!assign_mv(cm, xd, b_mode, mi->bmi[j].as_mv, best_ref_mvs,
@@ -777,6 +767,17 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
copy_mv_pair(mi->mv, mi->bmi[3].as_mv);
} else {
+ if (mi->mode != ZEROMV) {
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ int_mv tmp_mvs[MAX_MV_REF_CANDIDATES];
+ const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
+ int refmv_count =
+ dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, tmp_mvs,
+ mi_row, mi_col, -1);
+ lower_mv_precision(&tmp_mvs[refmv_count - 1].as_mv, allow_hp);
+ best_ref_mvs[ref] = tmp_mvs[refmv_count - 1];
+ }
+ }
xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs,
best_ref_mvs, is_compound, allow_hp, r);
}
diff --git a/libvpx/vp9/decoder/vp9_decoder.h b/libvpx/vp9/decoder/vp9_decoder.h
index 4a22aa6b5..b0ef83c73 100644
--- a/libvpx/vp9/decoder/vp9_decoder.h
+++ b/libvpx/vp9/decoder/vp9_decoder.h
@@ -55,6 +55,7 @@ typedef struct TileWorkerData {
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
+ DECLARE_ALIGNED(16, uint16_t, extend_and_predict_buf[80 * 2 * 80 * 2]);
struct vpx_internal_error_info error_info;
} TileWorkerData;
diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c
index e250a5a35..c2e6b3d54 100644
--- a/libvpx/vp9/decoder/vp9_detokenize.c
+++ b/libvpx/vp9/decoder/vp9_detokenize.c
@@ -243,9 +243,9 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
#endif // CONFIG_VP9_HIGHBITDEPTH
#else
if (read_bool(r, 128, &value, &count, &range)) {
- dqcoeff[scan[c]] = -v;
+ dqcoeff[scan[c]] = (tran_low_t)-v;
} else {
- dqcoeff[scan[c]] = v;
+ dqcoeff[scan[c]] = (tran_low_t)v;
}
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
++c;
diff --git a/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c b/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
index 8b62b450c..d75a48179 100644
--- a/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
+++ b/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
@@ -26,6 +26,22 @@
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/vpx_dsp_common.h"
+static INLINE void calculate_dqcoeff_and_store(const int16x8_t qcoeff,
+ const int16x8_t dequant,
+ tran_low_t *dqcoeff) {
+ const int32x4_t dqcoeff_0 =
+ vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant));
+ const int32x4_t dqcoeff_1 =
+ vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant));
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ vst1q_s32(dqcoeff, dqcoeff_0);
+ vst1q_s32(dqcoeff + 4, dqcoeff_1);
+#else
+ vst1q_s16(dqcoeff, vcombine_s16(vmovn_s32(dqcoeff_0), vmovn_s32(dqcoeff_1)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+}
+
void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
@@ -55,7 +71,8 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
- const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+ const int16x8_t v_abs = vabsq_s16(v_coeff);
+ const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
@@ -67,10 +84,9 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
- const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+ calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr);
v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
store_s16q_to_tran_low(qcoeff_ptr, v_qcoeff);
- store_s16q_to_tran_low(dqcoeff_ptr, v_dqcoeff);
v_round = vmovq_n_s16(round_ptr[1]);
v_quant = vmovq_n_s16(quant_ptr[1]);
v_dequant = vmovq_n_s16(dequant_ptr[1]);
@@ -80,7 +96,8 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr + i);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
- const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+ const int16x8_t v_abs = vabsq_s16(v_coeff);
+ const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
@@ -92,10 +109,9 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
- const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+ calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr + i);
v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
store_s16q_to_tran_low(qcoeff_ptr + i, v_qcoeff);
- store_s16q_to_tran_low(dqcoeff_ptr + i, v_dqcoeff);
}
#ifdef __aarch64__
*eob_ptr = vmaxvq_s16(v_eobmax_76543210);
@@ -146,9 +162,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
const int16x8_t dequant_mask =
vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));
- int16x8_t qcoeff = vaddq_s16(coeff_abs, round);
+ int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);
int32x4_t dqcoeff_0, dqcoeff_1;
- int16x8_t dqcoeff;
uint16x8_t eob_max;
(void)scan;
(void)count;
@@ -170,13 +185,17 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
// Add 1 if negative to round towards zero because the C uses division.
dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0));
dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1));
-
- dqcoeff = vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
+#if CONFIG_VP9_HIGHBITDEPTH
+ vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1));
+ vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1));
+#else
+ store_s16q_to_tran_low(dqcoeff_ptr, vcombine_s16(vshrn_n_s32(dqcoeff_0, 1),
+ vshrn_n_s32(dqcoeff_1, 1)));
+#endif
eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan);
store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
- store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
iscan += 8;
coeff_ptr += 8;
@@ -200,9 +219,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
const int16x8_t dequant_mask =
vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));
- int16x8_t qcoeff = vaddq_s16(coeff_abs, round);
+ int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);
int32x4_t dqcoeff_0, dqcoeff_1;
- int16x8_t dqcoeff;
qcoeff = vqdmulhq_s16(qcoeff, quant);
qcoeff = veorq_s16(qcoeff, coeff_sign);
@@ -215,14 +233,19 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0));
dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1));
- dqcoeff =
- vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
+#if CONFIG_VP9_HIGHBITDEPTH
+ vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1));
+ vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1));
+#else
+ store_s16q_to_tran_low(
+ dqcoeff_ptr,
+ vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1)));
+#endif
eob_max =
vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan));
store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
- store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
iscan += 8;
coeff_ptr += 8;
diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
index adb12c10c..858a41654 100644
--- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -187,7 +187,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, MODE_INFO *const mi,
// If this block is labeled for refresh, check if we should reset the
// segment_id.
- if (cyclic_refresh_segment_id_boosted(mi->segment_id)) {
+ if (cpi->sf.use_nonrd_pick_mode &&
+ cyclic_refresh_segment_id_boosted(mi->segment_id)) {
mi->segment_id = refresh_this_block;
// Reset segment_id if it will be skipped.
if (skip) mi->segment_id = CR_SEGMENT_ID_BASE;
diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c
index d47b411fa..9eddf545e 100644
--- a/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -1214,8 +1214,8 @@ static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize,
if (is_key_frame) return;
- // For speed >= 8, avoid the chroma check if y_sad is above threshold.
- if (cpi->oxcf.speed >= 8) {
+ // For speed > 8, avoid the chroma check if y_sad is above threshold.
+ if (cpi->oxcf.speed > 8) {
if (y_sad > cpi->vbp_thresholds[1] &&
(!cpi->noise_estimate.enabled ||
vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium))
@@ -4248,13 +4248,21 @@ static int rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (cpi->sf.prune_ref_frame_for_rect_partitions) {
uint8_t used_frames;
used_frames = ref_frames_used[0] | ref_frames_used[1];
- if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
+ if (used_frames) {
+ pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames & 0xff;
+ }
used_frames = ref_frames_used[2] | ref_frames_used[3];
- if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
+ if (used_frames) {
+ pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames & 0xff;
+ }
used_frames = ref_frames_used[0] | ref_frames_used[2];
- if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
+ if (used_frames) {
+ pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames & 0xff;
+ }
used_frames = ref_frames_used[1] | ref_frames_used[3];
- if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
+ if (used_frames) {
+ pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames & 0xff;
+ }
}
{
diff --git a/libvpx/vp9/encoder/vp9_encoder.c b/libvpx/vp9/encoder/vp9_encoder.c
index 7f82a470b..b1a81c04a 100644
--- a/libvpx/vp9/encoder/vp9_encoder.c
+++ b/libvpx/vp9/encoder/vp9_encoder.c
@@ -80,6 +80,7 @@
#include "vp9/encoder/vp9_speed_features.h"
#include "vp9/encoder/vp9_svc_layercontext.h"
#include "vp9/encoder/vp9_temporal_filter.h"
+#include "vp9/vp9_cx_iface.h"
#define AM_SEGMENT_ID_INACTIVE 7
#define AM_SEGMENT_ID_ACTIVE 0
@@ -459,8 +460,8 @@ static int compute_context_model_diff(const VP9_COMMON *const cm) {
#endif // !CONFIG_REALTIME_ONLY
// Test for whether to calculate metrics for the frame.
-static int is_psnr_calc_enabled(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
+static int is_psnr_calc_enabled(const VP9_COMP *cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame;
@@ -822,8 +823,28 @@ static void setup_frame(VP9_COMP *cpi) {
// layer ARF case as well.
if (cpi->multi_layer_arf && !cpi->use_svc) {
GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- cm->frame_context_idx = clamp(gf_group->layer_depth[gf_group->index] - 1, 0,
- FRAME_CONTEXTS - 1);
+ const int gf_group_index = gf_group->index;
+ const int boost_frame =
+ !cpi->rc.is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
+
+ // frame_context_idx Frame Type
+ // 0 Intra only frame, base layer ARF
+ // 1 ARFs with layer depth = 2,3
+ // 2 ARFs with layer depth > 3
+ // 3 Non-boosted frames
+ if (frame_is_intra_only(cm)) {
+ cm->frame_context_idx = 0;
+ } else if (boost_frame) {
+ if (gf_group->rf_level[gf_group_index] == GF_ARF_STD)
+ cm->frame_context_idx = 0;
+ else if (gf_group->layer_depth[gf_group_index] <= 3)
+ cm->frame_context_idx = 1;
+ else
+ cm->frame_context_idx = 2;
+ } else {
+ cm->frame_context_idx = 3;
+ }
}
if (cm->frame_type == KEY_FRAME) {
@@ -1436,7 +1457,6 @@ static void init_level_constraint(LevelConstraint *lc) {
lc->level_index = -1;
lc->max_cpb_size = INT_MAX;
lc->max_frame_size = INT_MAX;
- lc->rc_config_updated = 0;
lc->fail_flag = 0;
}
@@ -1448,7 +1468,7 @@ static void set_level_constraint(LevelConstraint *ls, int8_t level_index) {
}
}
-static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
+static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
VP9_COMMON *const cm = &cpi->common;
cpi->oxcf = *oxcf;
@@ -1513,13 +1533,15 @@ static void set_rc_buffer_sizes(RATE_CONTROL *rc,
}
#if CONFIG_VP9_HIGHBITDEPTH
+// TODO(angiebird): make sdx8f available for highbitdepth if needed
#define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].sdaf = SDAF; \
cpi->fn_ptr[BT].vf = VF; \
cpi->fn_ptr[BT].svf = SVF; \
cpi->fn_ptr[BT].svaf = SVAF; \
- cpi->fn_ptr[BT].sdx4df = SDX4DF;
+ cpi->fn_ptr[BT].sdx4df = SDX4DF; \
+ cpi->fn_ptr[BT].sdx8f = NULL;
#define MAKE_BFP_SAD_WRAPPER(fnname) \
static unsigned int fnname##_bits8(const uint8_t *src_ptr, \
@@ -2137,7 +2159,112 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
} while (++i <= MV_MAX);
}
-VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
+static void init_ref_frame_bufs(VP9_COMMON *cm) {
+ int i;
+ BufferPool *const pool = cm->buffer_pool;
+ cm->new_fb_idx = INVALID_IDX;
+ for (i = 0; i < REF_FRAMES; ++i) {
+ cm->ref_frame_map[i] = INVALID_IDX;
+ }
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
+ pool->frame_bufs[i].ref_count = 0;
+ }
+}
+
+static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth,
+ int subsampling_x, int subsampling_y) {
+ VP9_COMMON *const cm = &cpi->common;
+#if !CONFIG_VP9_HIGHBITDEPTH
+ (void)use_highbitdepth;
+ assert(use_highbitdepth == 0);
+#endif
+
+ if (!cpi->initial_width ||
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth != use_highbitdepth ||
+#endif
+ cm->subsampling_x != subsampling_x ||
+ cm->subsampling_y != subsampling_y) {
+ cm->subsampling_x = subsampling_x;
+ cm->subsampling_y = subsampling_y;
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth = use_highbitdepth;
+#endif
+ alloc_util_frame_buffers(cpi);
+ cpi->initial_width = cm->width;
+ cpi->initial_height = cm->height;
+ cpi->initial_mbs = cm->MBs;
+ }
+}
+
+// TODO(angiebird): Check whether we can move this function to vpx_image.c
+static INLINE void vpx_img_chroma_subsampling(vpx_img_fmt_t fmt,
+ unsigned int *subsampling_x,
+ unsigned int *subsampling_y) {
+ switch (fmt) {
+ case VPX_IMG_FMT_I420:
+ case VPX_IMG_FMT_YV12:
+ case VPX_IMG_FMT_I422:
+ case VPX_IMG_FMT_I42016:
+ case VPX_IMG_FMT_I42216: *subsampling_x = 1; break;
+ default: *subsampling_x = 0; break;
+ }
+
+ switch (fmt) {
+ case VPX_IMG_FMT_I420:
+ case VPX_IMG_FMT_I440:
+ case VPX_IMG_FMT_YV12:
+ case VPX_IMG_FMT_I42016:
+ case VPX_IMG_FMT_I44016: *subsampling_y = 1; break;
+ default: *subsampling_y = 0; break;
+ }
+}
+
+// TODO(angiebird): Check whether we can move this function to vpx_image.c
+static INLINE int vpx_img_use_highbitdepth(vpx_img_fmt_t fmt) {
+ return fmt & VPX_IMG_FMT_HIGHBITDEPTH;
+}
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+static void setup_denoiser_buffer(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ if (cpi->oxcf.noise_sensitivity > 0 &&
+ !cpi->denoiser.frame_buffer_initialized) {
+ if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
+ cpi->oxcf.noise_sensitivity, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VP9_ENC_BORDER_IN_PIXELS))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate denoiser");
+ }
+}
+#endif
+
+void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt) {
+ const VP9EncoderConfig *oxcf = &cpi->oxcf;
+ unsigned int subsampling_x, subsampling_y;
+ const int use_highbitdepth = vpx_img_use_highbitdepth(img_fmt);
+ vpx_img_chroma_subsampling(img_fmt, &subsampling_x, &subsampling_y);
+
+ update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ setup_denoiser_buffer(cpi);
+#endif
+
+ assert(cpi->lookahead == NULL);
+ cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, subsampling_x,
+ subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ use_highbitdepth,
+#endif
+ oxcf->lag_in_frames);
+ alloc_raw_frame_buffers(cpi);
+}
+
+VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
BufferPool *const pool) {
unsigned int i;
VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
@@ -2170,10 +2297,13 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->resize_buffer_underflow = 0;
cpi->use_skin_detection = 0;
cpi->common.buffer_pool = pool;
+ init_ref_frame_bufs(cm);
cpi->force_update_segmentation = 0;
init_config(cpi, oxcf);
+ cpi->frame_info = vp9_get_frame_info(oxcf);
+
vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
cm->current_video_frame = 0;
@@ -2341,6 +2471,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
const int packets_in_layer = (int)last_packet_for_layer->count + 1;
if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
+ int num_frames;
LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
vpx_free(lc->rc_twopass_stats_in.buf);
@@ -2352,6 +2483,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
lc->twopass.stats_in = lc->twopass.stats_in_start;
lc->twopass.stats_in_end =
lc->twopass.stats_in_start + packets_in_layer - 1;
+ // Note the last packet is cumulative first pass stats.
+ // So the number of frames is packet number minus one
+ num_frames = packets_in_layer - 1;
+ fps_init_first_pass_info(&lc->twopass.first_pass_info,
+ lc->rc_twopass_stats_in.buf, num_frames);
stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
}
}
@@ -2367,6 +2503,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
vp9_init_second_pass_spatial_svc(cpi);
} else {
+ int num_frames;
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
const size_t psz = cpi->common.MBs * sizeof(uint8_t);
@@ -2383,6 +2520,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
cpi->twopass.stats_in = cpi->twopass.stats_in_start;
cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
+ // Note the last packet is cumulative first pass stats.
+ // So the number of frames is packet number minus one
+ num_frames = packets - 1;
+ fps_init_first_pass_info(&cpi->twopass.first_pass_info,
+ oxcf->two_pass_stats_in.buf, num_frames);
vp9_init_second_pass(cpi);
}
@@ -2409,7 +2551,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->kmeans_data_arr_alloc = 0;
#if CONFIG_NON_GREEDY_MV
- cpi->feature_score_loc_alloc = 0;
cpi->tpl_ready = 0;
#endif // CONFIG_NON_GREEDY_MV
for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
@@ -2418,62 +2559,67 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
cpi->source_var_thresh = 0;
cpi->frames_till_next_var_check = 0;
+#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \
+ cpi->fn_ptr[BT].sdf = SDF; \
+ cpi->fn_ptr[BT].sdaf = SDAF; \
+ cpi->fn_ptr[BT].vf = VF; \
+ cpi->fn_ptr[BT].svf = SVF; \
+ cpi->fn_ptr[BT].svaf = SVAF; \
+ cpi->fn_ptr[BT].sdx4df = SDX4DF; \
+ cpi->fn_ptr[BT].sdx8f = SDX8F;
-#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
- cpi->fn_ptr[BT].sdf = SDF; \
- cpi->fn_ptr[BT].sdaf = SDAF; \
- cpi->fn_ptr[BT].vf = VF; \
- cpi->fn_ptr[BT].svf = SVF; \
- cpi->fn_ptr[BT].svaf = SVAF; \
- cpi->fn_ptr[BT].sdx4df = SDX4DF;
-
+ // TODO(angiebird): make sdx8f available for every block size
BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
- vpx_sad32x16x4d)
+ vpx_sad32x16x4d, NULL)
BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
- vpx_sad16x32x4d)
+ vpx_sad16x32x4d, NULL)
BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
- vpx_sad64x32x4d)
+ vpx_sad64x32x4d, NULL)
BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
- vpx_sad32x64x4d)
+ vpx_sad32x64x4d, NULL)
BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
- vpx_sad32x32x4d)
+ vpx_sad32x32x4d, vpx_sad32x32x8)
BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
- vpx_sad64x64x4d)
+ vpx_sad64x64x4d, NULL)
BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
- vpx_sad16x16x4d)
+ vpx_sad16x16x4d, vpx_sad16x16x8)
BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
- vpx_sad16x8x4d)
+ vpx_sad16x8x4d, vpx_sad16x8x8)
BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
- vpx_sad8x16x4d)
+ vpx_sad8x16x4d, vpx_sad8x16x8)
BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
- vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d)
+ vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
+ vpx_sad8x8x8)
BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
- vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d)
+ vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
+ NULL)
BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
- vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d)
+ vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
+ NULL)
BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
- vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d)
+ vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
+ vpx_sad4x4x8)
#if CONFIG_VP9_HIGHBITDEPTH
highbd_set_var_fns(cpi);
@@ -2501,6 +2647,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cm->error.setjmp = 0;
+#if CONFIG_RATE_CTRL
+ encode_command_init(&cpi->encode_command);
+#endif
+
return cpi;
}
@@ -2511,9 +2661,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
#endif // CONFIG_INTERNAL_STATS
+static void free_tpl_buffer(VP9_COMP *cpi);
+
void vp9_remove_compressor(VP9_COMP *cpi) {
VP9_COMMON *cm;
- unsigned int i, frame;
+ unsigned int i;
int t;
if (!cpi) return;
@@ -2586,9 +2738,16 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
}
- fprintf(f, "%s\t Time\tRcErr\tAbsErr\n", headings);
- fprintf(f, "%s\t%8.0f\t%7.2f\t%7.2f\n", results, total_encode_time,
- rate_err, fabs(rate_err));
+ SNPRINT(headings, "\t Time\tRcErr\tAbsErr");
+ SNPRINT2(results, "\t%8.0f", total_encode_time);
+ SNPRINT2(results, "\t%7.2f", rate_err);
+ SNPRINT2(results, "\t%7.2f", fabs(rate_err));
+
+ fprintf(f, "%s\tAPsnr611\n", headings);
+ fprintf(
+ f, "%s\t%7.3f\n", results,
+ (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) /
+ (cpi->count * 8));
}
fclose(f);
@@ -2618,27 +2777,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
vpx_free(cpi->kmeans_data_arr);
}
-#if CONFIG_NON_GREEDY_MV
- vpx_free(cpi->feature_score_loc_arr);
- vpx_free(cpi->feature_score_loc_sort);
- vpx_free(cpi->feature_score_loc_heap);
- vpx_free(cpi->select_mv_arr);
-#endif
- for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
-#if CONFIG_NON_GREEDY_MV
- int rf_idx;
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
- int sqr_bsize;
- for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
- vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]);
- }
- vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
- vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
- }
-#endif
- vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
- cpi->tpl_stats[frame].is_valid = 0;
- }
+ free_tpl_buffer(cpi);
for (t = 0; t < cpi->num_workers; ++t) {
VPxWorker *const worker = &cpi->workers[t];
@@ -2719,30 +2858,19 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
#endif
}
-static void generate_psnr_packet(VP9_COMP *cpi) {
- struct vpx_codec_cx_pkt pkt;
- int i;
- PSNR_STATS psnr;
+int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) {
+ if (is_psnr_calc_enabled(cpi)) {
#if CONFIG_VP9_HIGHBITDEPTH
- vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr,
- cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
+ vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr,
+ cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
#else
- vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr);
+ vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr);
#endif
-
- for (i = 0; i < 4; ++i) {
- pkt.data.psnr.samples[i] = psnr.samples[i];
- pkt.data.psnr.sse[i] = psnr.sse[i];
- pkt.data.psnr.psnr[i] = psnr.psnr[i];
+ return 1;
+ } else {
+ vp9_zero(*psnr);
+ return 0;
}
- pkt.kind = VPX_CODEC_PSNR_PKT;
- if (cpi->use_svc)
- cpi->svc
- .layer_context[cpi->svc.spatial_layer_id *
- cpi->svc.number_temporal_layers]
- .psnr_pkt = pkt.data.psnr;
- else
- vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
}
int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
@@ -3572,29 +3700,12 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
vpx_calloc(cpi->un_scaled_source->y_width,
sizeof(*cpi->common.postproc_state.limits));
}
- vp9_denoise(cpi->Source, cpi->Source, l, cpi->common.postproc_state.limits);
+ vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l,
+ cpi->common.postproc_state.limits);
}
#endif // CONFIG_VP9_POSTPROC
}
-#if CONFIG_VP9_TEMPORAL_DENOISING
-static void setup_denoiser_buffer(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- if (cpi->oxcf.noise_sensitivity > 0 &&
- !cpi->denoiser.frame_buffer_initialized) {
- if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
- cpi->oxcf.noise_sensitivity, cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VP9_ENC_BORDER_IN_PIXELS))
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate denoiser");
- }
-}
-#endif
-
static void init_motion_estimation(VP9_COMP *cpi) {
int y_stride = cpi->scaled_source.y_stride;
@@ -4175,6 +4286,14 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
vp9_scale_references(cpi);
}
+#if CONFIG_RATE_CTRL
+ // TODO(angiebird): This is a hack for making sure the encoder use the
+ // external_quantize_index exactly. Avoid this kind of hack later.
+ if (cpi->encode_command.use_external_quantize_index) {
+ q = cpi->encode_command.external_quantize_index;
+ }
+#endif
+
vp9_set_quantizer(cm, q);
if (loop_count == 0) setup_frame(cpi);
@@ -4213,6 +4332,16 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
}
+#if CONFIG_RATE_CTRL
+ // This part needs to be after save_coding_context() because
+ // restore_coding_context will be called in the end of this function.
+ // TODO(angiebird): This is a hack for making sure the encoder use the
+ // external_quantize_index exactly. Avoid this kind of hack later.
+ if (cpi->encode_command.use_external_quantize_index) {
+ break;
+ }
+#endif
+
if (oxcf->rc_mode == VPX_Q) {
loop = 0;
} else {
@@ -4389,7 +4518,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
}
if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF)
- if (loop || !enable_acl) restore_coding_context(cpi);
+ if (loop) restore_coding_context(cpi);
} while (loop);
#ifdef AGGRESSIVE_VBR
@@ -4415,13 +4544,11 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
// Skip recoding, if model diff is below threshold
const int thresh = compute_context_model_thresh(cpi);
const int diff = compute_context_model_diff(cm);
- if (diff < thresh) {
- vpx_clear_system_state();
- restore_coding_context(cpi);
- return;
+ if (diff >= thresh) {
+ vp9_encode_frame(cpi);
}
-
- vp9_encode_frame(cpi);
+ }
+ if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
vpx_clear_system_state();
restore_coding_context(cpi);
}
@@ -4756,17 +4883,6 @@ static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
}
}
-// Implementation and modifications of C. Yeo, H. L. Tan, and Y. H. Tan, "On
-// rate distortion optimization using SSIM," Circuits and Systems for Video
-// Technology, IEEE Transactions on, vol. 23, no. 7, pp. 1170-1181, 2013.
-// SSIM_VAR_SCALE defines the strength of the bias towards SSIM in RDO.
-// Some sample values are:
-// (for midres test set)
-// SSIM_VAR_SCALE avg_psnr ssim ms_ssim
-// 8.0 9.421 -5.537 -6.898
-// 16.0 4.703 -5.378 -6.238
-// 32.0 1.929 -4.308 -4.807
-#define SSIM_VAR_SCALE 16.0
static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
ThreadData *td = &cpi->td;
@@ -4783,19 +4899,6 @@ static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
double log_sum = 0.0;
int row, col;
-#if CONFIG_VP9_HIGHBITDEPTH
- double c2;
- if (xd->bd == 10) {
- c2 = 941.8761; // (.03*1023)^2
- } else if (xd->bd == 12) {
- c2 = 15092.1225; // (.03*4095)^2
- } else {
- c2 = 58.5225; // (.03*255)^2
- }
-#else
- const double c2 = 58.5225; // (.03*255)^2
-#endif
-
// Loop through each 64x64 block.
for (row = 0; row < num_rows; ++row) {
for (col = 0; col < num_cols; ++col) {
@@ -4817,19 +4920,22 @@ static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
// In order to make SSIM_VAR_SCALE in a same scale for both 8 bit
// and high bit videos, the variance needs to be divided by 2.0 or
// 64.0 separately.
+ // TODO(sdeng): need to tune for 12bit videos.
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
- var +=
- vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd) / 2.0;
+ var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd);
else
#endif
- var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8) / 64.0;
+ var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8);
num_of_var += 1.0;
}
}
- var = var / num_of_var / SSIM_VAR_SCALE;
- var = 2.0 * var + c2;
+ var = var / num_of_var / 64.0;
+
+ // Curve fitting with an exponential model on all 16x16 blocks from the
+ // Midres dataset.
+ var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222;
cpi->mi_ssim_rdmult_scaling_factors[index] = var;
log_sum += log(var);
}
@@ -4976,12 +5082,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
TX_SIZE t;
// SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
- // If in constrained layer drop mode (svc.framedrop_mode != LAYER_DROP) and
- // base spatial layer was dropped, no need to set svc.skip_enhancement_layer,
- // as whole superframe will be dropped.
+ // No need to set svc.skip_enhancement_layer if whole superframe will be
+ // dropped.
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
cpi->oxcf.target_bandwidth == 0 &&
!(cpi->svc.framedrop_mode != LAYER_DROP &&
+ (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP ||
+ cpi->svc
+ .force_drop_constrained_from_above[cpi->svc.number_spatial_layers -
+ 1]) &&
cpi->svc.drop_spatial_layer[0])) {
cpi->svc.skip_enhancement_layer = 1;
vp9_rc_postencode_update_drop_frame(cpi);
@@ -4989,17 +5098,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
cpi->last_frame_dropped = 1;
cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
- if (cpi->svc.framedrop_mode == LAYER_DROP ||
- cpi->svc.drop_spatial_layer[0] == 0) {
- // For the case of constrained drop mode where the base is dropped
- // (drop_spatial_layer[0] == 1), which means full superframe dropped,
- // we don't increment the svc frame counters. In particular temporal
- // layer counter (which is incremented in vp9_inc_frame_in_layer())
- // won't be incremented, so on a dropped frame we try the same
- // temporal_layer_id on next incoming frame. This is to avoid an
- // issue with temporal alignement with full superframe dropping.
- vp9_inc_frame_in_layer(cpi);
- }
+ vp9_inc_frame_in_layer(cpi);
return;
}
@@ -5285,54 +5384,9 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
mismatch_move_frame_idx_w();
#endif
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
-
- vp9_twopass_postencode_update(cpi);
}
#endif // !CONFIG_REALTIME_ONLY
-static void init_ref_frame_bufs(VP9_COMMON *cm) {
- int i;
- BufferPool *const pool = cm->buffer_pool;
- cm->new_fb_idx = INVALID_IDX;
- for (i = 0; i < REF_FRAMES; ++i) {
- cm->ref_frame_map[i] = INVALID_IDX;
- }
- for (i = 0; i < FRAME_BUFFERS; ++i) {
- pool->frame_bufs[i].ref_count = 0;
- }
-}
-
-static void check_initial_width(VP9_COMP *cpi,
-#if CONFIG_VP9_HIGHBITDEPTH
- int use_highbitdepth,
-#endif
- int subsampling_x, int subsampling_y) {
- VP9_COMMON *const cm = &cpi->common;
-
- if (!cpi->initial_width ||
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth != use_highbitdepth ||
-#endif
- cm->subsampling_x != subsampling_x ||
- cm->subsampling_y != subsampling_y) {
- cm->subsampling_x = subsampling_x;
- cm->subsampling_y = subsampling_y;
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth = use_highbitdepth;
-#endif
-
- alloc_raw_frame_buffers(cpi);
- init_ref_frame_bufs(cm);
- alloc_util_frame_buffers(cpi);
-
- init_motion_estimation(cpi); // TODO(agrange) This can be removed.
-
- cpi->initial_width = cm->width;
- cpi->initial_height = cm->height;
- cpi->initial_mbs = cm->MBs;
- }
-}
-
int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time) {
@@ -5343,30 +5397,21 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
const int subsampling_y = sd->subsampling_y;
#if CONFIG_VP9_HIGHBITDEPTH
const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
-#endif
-
-#if CONFIG_VP9_HIGHBITDEPTH
- check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
#else
- check_initial_width(cpi, subsampling_x, subsampling_y);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-#if CONFIG_VP9_HIGHBITDEPTH
- // Disable denoiser for high bitdepth since vp9_denoiser_filter only works for
- // 8 bits.
- if (cm->bit_depth > 8) cpi->oxcf.noise_sensitivity = 0;
+ const int use_highbitdepth = 0;
#endif
+ update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
#if CONFIG_VP9_TEMPORAL_DENOISING
setup_denoiser_buffer(cpi);
#endif
+
+ alloc_raw_frame_buffers(cpi);
+
vpx_usec_timer_start(&timer);
if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
-#if CONFIG_VP9_HIGHBITDEPTH
- use_highbitdepth,
-#endif // CONFIG_VP9_HIGHBITDEPTH
- frame_flags))
+ use_highbitdepth, frame_flags))
res = -1;
vpx_usec_timer_mark(&timer);
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
@@ -5867,18 +5912,89 @@ static void init_tpl_stats(VP9_COMP *cpi) {
}
#if CONFIG_NON_GREEDY_MV
-static uint32_t motion_compensated_prediction(
- VP9_COMP *cpi, ThreadData *td, int frame_idx, uint8_t *cur_frame_buf,
- uint8_t *ref_frame_buf, int stride, BLOCK_SIZE bsize, int mi_row,
- int mi_col, MV *mv, int rf_idx) {
-#else // CONFIG_NON_GREEDY_MV
+static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
+ MotionField *motion_field,
+ int frame_idx, uint8_t *cur_frame_buf,
+ uint8_t *ref_frame_buf, int stride,
+ BLOCK_SIZE bsize, int mi_row,
+ int mi_col, MV *mv) {
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+ int step_param;
+ uint32_t bestsme = UINT_MAX;
+ const MvLimits tmp_mv_limits = x->mv_limits;
+ // lambda is used to adjust the importance of motion vector consitency.
+ // TODO(angiebird): Figure out lambda's proper value.
+ const int lambda = cpi->tpl_stats[frame_idx].lambda;
+ int_mv nb_full_mvs[NB_MVS_NUM];
+ int nb_full_mv_num;
+
+ MV best_ref_mv1 = { 0, 0 };
+ MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
+
+ best_ref_mv1_full.col = best_ref_mv1.col >> 3;
+ best_ref_mv1_full.row = best_ref_mv1.row >> 3;
+
+ // Setup frame pointers
+ x->plane[0].src.buf = cur_frame_buf;
+ x->plane[0].src.stride = stride;
+ xd->plane[0].pre[0].buf = ref_frame_buf;
+ xd->plane[0].pre[0].stride = stride;
+
+ step_param = mv_sf->reduce_first_step_size;
+ step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
+
+ vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
+
+ nb_full_mv_num =
+ vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
+ vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
+ lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
+
+ /* restore UMV window */
+ x->mv_limits = tmp_mv_limits;
+
+ return bestsme;
+}
+
+static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
+ uint8_t *cur_frame_buf,
+ uint8_t *ref_frame_buf, int stride,
+ BLOCK_SIZE bsize, MV *mv) {
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+ uint32_t bestsme = UINT_MAX;
+ uint32_t distortion;
+ uint32_t sse;
+ int cost_list[5];
+
+ MV best_ref_mv1 = { 0, 0 };
+
+ // Setup frame pointers
+ x->plane[0].src.buf = cur_frame_buf;
+ x->plane[0].src.stride = stride;
+ xd->plane[0].pre[0].buf = ref_frame_buf;
+ xd->plane[0].pre[0].stride = stride;
+
+ // TODO(yunqing): may use higher tap interp filter than 2 taps.
+ // Ignore mv costing by sending NULL pointer instead of cost array
+ bestsme = cpi->find_fractional_mv_step(
+ x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
+ cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
+ USE_2_TAPS);
+
+ return bestsme;
+}
+
+#else // CONFIG_NON_GREEDY_MV
static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
- int frame_idx,
uint8_t *cur_frame_buf,
uint8_t *ref_frame_buf,
int stride, BLOCK_SIZE bsize,
- int mi_row, int mi_col, MV *mv) {
-#endif // CONFIG_NON_GREEDY_MV
+ MV *mv) {
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
@@ -5890,12 +6006,6 @@ static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
uint32_t sse;
int cost_list[5];
const MvLimits tmp_mv_limits = x->mv_limits;
-#if CONFIG_NON_GREEDY_MV
- // lambda is used to adjust the importance of motion vector consitency.
- // TODO(angiebird): Figure out lambda's proper value.
- const int lambda = cpi->tpl_stats[frame_idx].lambda;
- int_mv nb_full_mvs[NB_MVS_NUM];
-#endif
MV best_ref_mv1 = { 0, 0 };
MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
@@ -5914,21 +6024,9 @@ static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
-#if CONFIG_NON_GREEDY_MV
- (void)search_method;
- (void)sadpb;
- vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx,
- bsize, nb_full_mvs);
- vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1,
- &cpi->fn_ptr[bsize], nb_full_mvs, NB_MVS_NUM, mv);
-#else
- (void)frame_idx;
- (void)mi_row;
- (void)mi_col;
vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
search_method, sadpb, cond_cost_list(cpi, cost_list),
&best_ref_mv1, mv, 0, 0);
-#endif
/* restore UMV window */
x->mv_limits = tmp_mv_limits;
@@ -5943,6 +6041,7 @@ static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
return bestsme;
}
+#endif
static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
int ref_pos_col, int block, BLOCK_SIZE bsize) {
@@ -6224,19 +6323,22 @@ static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
set_mv_limits(cm, x, mi_row, mi_col);
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
int_mv mv;
+#if CONFIG_NON_GREEDY_MV
+ MotionField *motion_field;
+#endif
if (ref_frame[rf_idx] == NULL) continue;
#if CONFIG_NON_GREEDY_MV
(void)td;
- mv.as_int =
- get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col)->as_int;
+ motion_field = vp9_motion_field_info_get_motion_field(
+ &cpi->motion_field_info, frame_idx, rf_idx, bsize);
+ mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
#else
- motion_compensated_prediction(
- cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
- ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
- mi_row, mi_col, &mv.as_mv);
+ motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_buffer + mb_y_offset,
+ xd->cur_buf->y_stride, bsize, &mv.as_mv);
#endif
#if CONFIG_VP9_HIGHBITDEPTH
@@ -6378,8 +6480,9 @@ static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
}
static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
- TplDepFrame *tpl_frame, int rf_idx,
- BLOCK_SIZE bsize, int mi_row, int mi_col) {
+ MotionField *motion_field,
+ TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
+ int mi_row, int mi_col) {
int_mv mv;
switch (mv_mode) {
case ZERO_MV_MODE:
@@ -6387,7 +6490,7 @@ static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
mv.as_mv.col = 0;
break;
case NEW_MV_MODE:
- mv = *get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col);
+ mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
break;
case NEAREST_MV_MODE:
mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
@@ -6404,15 +6507,16 @@ static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
}
static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
- GF_PICTURE *gf_picture, int frame_idx,
- TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize,
- int mi_row, int mi_col, int_mv *mv) {
+ GF_PICTURE *gf_picture, MotionField *motion_field,
+ int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int_mv *mv) {
uint32_t sse;
struct buf_2d src;
struct buf_2d pre;
MV full_mv;
- *mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row,
- mi_col);
+ *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
+ mi_row, mi_col);
full_mv = get_full_mv(&mv->as_mv);
if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
&src, &pre)) {
@@ -6449,18 +6553,18 @@ static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
return mv_diff_cost;
}
-static double get_mv_cost(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
- int rf_idx, BLOCK_SIZE bsize, int mi_row,
+static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
+ TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
int mi_col) {
double mv_cost = get_mv_mode_cost(mv_mode);
if (mv_mode == NEW_MV_MODE) {
- MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize,
- mi_row, mi_col)
+ MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
+ bsize, mi_row, mi_col)
.as_mv;
- MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, tpl_frame, rf_idx,
- bsize, mi_row, mi_col)
+ MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
+ tpl_frame, bsize, mi_row, mi_col)
.as_mv;
- MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, tpl_frame, rf_idx,
+ MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
bsize, mi_row, mi_col)
.as_mv;
double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
@@ -6471,21 +6575,24 @@ static double get_mv_cost(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
}
static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
- GF_PICTURE *gf_picture, int frame_idx,
- TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize,
- int mi_row, int mi_col, int_mv *mv) {
+ GF_PICTURE *gf_picture, MotionField *motion_field,
+ int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int_mv *mv) {
MACROBLOCKD *xd = &x->e_mbd;
- double mv_dist = get_mv_dist(mv_mode, cpi, xd, gf_picture, frame_idx,
- tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
+ double mv_dist =
+ get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
+ tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
double mv_cost =
- get_mv_cost(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row, mi_col);
+ get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
double mult = 180;
return mv_cost + mult * log2f(1 + mv_dist);
}
static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
- GF_PICTURE *gf_picture, int frame_idx,
+ GF_PICTURE *gf_picture,
+ MotionField *motion_field, int frame_idx,
TplDepFrame *tpl_frame, int rf_idx,
BLOCK_SIZE bsize, int mi_row, int mi_col,
double *rd, int_mv *mv) {
@@ -6499,8 +6606,8 @@ static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (mv_mode == NEW_MV_MODE) {
continue;
}
- this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, frame_idx, tpl_frame,
- rf_idx, bsize, mi_row, mi_col, &this_mv);
+ this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
+ tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
if (update == 0) {
*rd = this_rd;
*mv = this_mv;
@@ -6518,8 +6625,8 @@ static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
- GF_PICTURE *gf_picture, int frame_idx,
- TplDepFrame *tpl_frame, int rf_idx,
+ GF_PICTURE *gf_picture, MotionField *motion_field,
+ int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
BLOCK_SIZE bsize, int mi_row, int mi_col) {
const int mi_height = num_8x8_blocks_high_lookup[bsize];
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
@@ -6549,9 +6656,9 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
double this_rd;
int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
- mv_mode_arr[nb_row * stride + nb_col] =
- find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame,
- rf_idx, bsize, nb_row, nb_col, &this_rd, mv);
+ mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
+ cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
+ bsize, nb_row, nb_col, &this_rd, mv);
if (r == 0 && c == 0) {
this_no_new_mv_rd = this_rd;
}
@@ -6565,9 +6672,9 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
// new mv
mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
- this_new_mv_rd = eval_mv_mode(NEW_MV_MODE, cpi, x, gf_picture, frame_idx,
- tpl_frame, rf_idx, bsize, mi_row, mi_col,
- &select_mv_arr[mi_row * stride + mi_col]);
+ this_new_mv_rd = eval_mv_mode(
+ NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
+ rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
new_mv_rd = this_new_mv_rd;
// We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
// beforehand.
@@ -6580,9 +6687,9 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
double this_rd;
int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
- mv_mode_arr[nb_row * stride + nb_col] =
- find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame,
- rf_idx, bsize, nb_row, nb_col, &this_rd, mv);
+ mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
+ cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
+ bsize, nb_row, nb_col, &this_rd, mv);
new_mv_rd += this_rd;
}
}
@@ -6612,7 +6719,8 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
- GF_PICTURE *gf_picture, int frame_idx,
+ GF_PICTURE *gf_picture,
+ MotionField *motion_field, int frame_idx,
TplDepFrame *tpl_frame, int rf_idx,
BLOCK_SIZE bsize) {
const int mi_height = num_8x8_blocks_high_lookup[bsize];
@@ -6631,160 +6739,40 @@ static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
assert(c >= 0 && c < unit_cols);
assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
- predict_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, rf_idx, bsize,
- mi_row, mi_col);
+ predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
+ rf_idx, bsize, mi_row, mi_col);
}
}
}
-static double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows,
- int cols) {
- double IxIx = 0;
- double IxIy = 0;
- double IyIy = 0;
- double score;
- int r, c;
- vpx_clear_system_state();
- for (r = 0; r + 1 < rows; ++r) {
- for (c = 0; c + 1 < cols; ++c) {
- int diff_x = buf[r * stride + c] - buf[r * stride + c + 1];
- int diff_y = buf[r * stride + c] - buf[(r + 1) * stride + c];
- IxIx += diff_x * diff_x;
- IxIy += diff_x * diff_y;
- IyIy += diff_y * diff_y;
- }
- }
- IxIx /= (rows - 1) * (cols - 1);
- IxIy /= (rows - 1) * (cols - 1);
- IyIy /= (rows - 1) * (cols - 1);
- score = (IxIx * IyIy - IxIy * IxIy + 0.0001) / (IxIx + IyIy + 0.0001);
- return score;
-}
-
-static int compare_feature_score(const void *a, const void *b) {
- const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a;
- const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b;
- if (aa->feature_score < bb->feature_score) {
- return 1;
- } else if (aa->feature_score > bb->feature_score) {
- return -1;
- } else {
- return 0;
- }
-}
-
-static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx,
- YV12_BUFFER_CONFIG **ref_frame, BLOCK_SIZE bsize,
+static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
+ MotionField *motion_field, int frame_idx,
+ YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
int mi_row, int mi_col) {
VP9_COMMON *cm = &cpi->common;
MACROBLOCK *x = &td->mb;
MACROBLOCKD *xd = &x->e_mbd;
- TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
- TplDepStats *tpl_stats =
- &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
const int mb_y_offset =
mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
- int rf_idx;
-
+ assert(ref_frame != NULL);
set_mv_limits(cm, x, mi_row, mi_col);
-
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
- int_mv *mv = get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col);
- if (ref_frame[rf_idx] == NULL) {
- tpl_stats->ready[rf_idx] = 0;
- continue;
- } else {
- tpl_stats->ready[rf_idx] = 1;
- }
- motion_compensated_prediction(
- cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
- ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
- mi_row, mi_col, &mv->as_mv, rf_idx);
- }
-}
-
-#define CHANGE_MV_SEARCH_ORDER 1
-#define USE_PQSORT 1
-
-#if CHANGE_MV_SEARCH_ORDER
-#if USE_PQSORT
-static void max_heap_pop(FEATURE_SCORE_LOC **heap, int *size,
- FEATURE_SCORE_LOC **output) {
- if (*size > 0) {
- *output = heap[0];
- --*size;
- if (*size > 0) {
- int p, l, r;
- heap[0] = heap[*size];
- p = 0;
- l = 2 * p + 1;
- r = 2 * p + 2;
- while (l < *size) {
- FEATURE_SCORE_LOC *tmp;
- int c = l;
- if (r < *size && heap[r]->feature_score > heap[l]->feature_score) {
- c = r;
- }
- if (heap[p]->feature_score >= heap[c]->feature_score) {
- break;
- }
- tmp = heap[p];
- heap[p] = heap[c];
- heap[c] = tmp;
- p = c;
- l = 2 * p + 1;
- r = 2 * p + 2;
- }
- }
- } else {
- assert(0);
- }
-}
-
-static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size,
- FEATURE_SCORE_LOC *input) {
- int c, p;
- FEATURE_SCORE_LOC *tmp;
- input->visited = 1;
- heap[*size] = input;
- ++*size;
- c = *size - 1;
- p = c >> 1;
- while (c > 0 && heap[c]->feature_score > heap[p]->feature_score) {
- tmp = heap[p];
- heap[p] = heap[c];
- heap[c] = tmp;
- c = p;
- p >>= 1;
- }
-}
-
-static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- int *heap_size) {
- const int mi_unit = num_8x8_blocks_wide_lookup[bsize];
- const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
- int i;
- for (i = 0; i < NB_MVS_NUM; ++i) {
- int r = dirs[i][0] * mi_unit;
- int c = dirs[i][1] * mi_unit;
- if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
- mi_col + c < tpl_frame->mi_cols) {
- FEATURE_SCORE_LOC *fs_loc =
- &cpi->feature_score_loc_arr[(mi_row + r) * tpl_frame->stride +
- (mi_col + c)];
- if (fs_loc->visited == 0) {
- max_heap_push(cpi->feature_score_loc_heap, heap_size, fs_loc);
- }
- }
- }
-}
-#endif // USE_PQSORT
-#endif // CHANGE_MV_SEARCH_ORDER
-
-static void build_motion_field(VP9_COMP *cpi, MACROBLOCKD *xd, int frame_idx,
- YV12_BUFFER_CONFIG *ref_frame[3],
- BLOCK_SIZE bsize) {
+ {
+ int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
+ uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
+ uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
+ const int stride = xd->cur_buf->y_stride;
+ full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
+ ref_frame_buf, stride, bsize, mi_row, mi_col,
+ &mv.as_mv);
+ sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
+ bsize, &mv.as_mv);
+ vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
+ }
+}
+
+static void build_motion_field(
+ VP9_COMP *cpi, int frame_idx,
+ YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
VP9_COMMON *cm = &cpi->common;
ThreadData *td = &cpi->td;
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
@@ -6792,79 +6780,26 @@ static void build_motion_field(VP9_COMP *cpi, MACROBLOCKD *xd, int frame_idx,
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
- int fs_loc_sort_size;
- int fs_loc_heap_size;
int mi_row, mi_col;
+ int rf_idx;
tpl_frame->lambda = (pw * ph) >> 2;
assert(pw * ph == tpl_frame->lambda << 2);
- fs_loc_sort_size = 0;
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
- const int mb_y_offset =
- mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
- const int bw = 4 << b_width_log2_lookup[bsize];
- const int bh = 4 << b_height_log2_lookup[bsize];
- TplDepStats *tpl_stats =
- &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
- FEATURE_SCORE_LOC *fs_loc =
- &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col];
- tpl_stats->feature_score = get_feature_score(
- xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh);
- fs_loc->visited = 0;
- fs_loc->feature_score = tpl_stats->feature_score;
- fs_loc->mi_row = mi_row;
- fs_loc->mi_col = mi_col;
- cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc;
- ++fs_loc_sort_size;
- }
- }
-
- qsort(cpi->feature_score_loc_sort, fs_loc_sort_size,
- sizeof(*cpi->feature_score_loc_sort), compare_feature_score);
-
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
- int rf_idx;
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
- TplDepStats *tpl_stats =
- &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
- tpl_stats->ready[rf_idx] = 0;
- }
+ for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
+ MotionField *motion_field = vp9_motion_field_info_get_motion_field(
+ &cpi->motion_field_info, frame_idx, rf_idx, bsize);
+ if (ref_frame[rf_idx] == NULL) {
+ continue;
}
- }
-
-#if CHANGE_MV_SEARCH_ORDER
-#if !USE_PQSORT
- for (i = 0; i < fs_loc_sort_size; ++i) {
- FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i];
- do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
- fs_loc->mi_col);
- }
-#else // !USE_PQSORT
- fs_loc_heap_size = 0;
- max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size,
- cpi->feature_score_loc_sort[0]);
-
- while (fs_loc_heap_size > 0) {
- FEATURE_SCORE_LOC *fs_loc;
- max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc);
-
- do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
- fs_loc->mi_col);
-
- add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col,
- &fs_loc_heap_size);
- }
-#endif // !USE_PQSORT
-#else // CHANGE_MV_SEARCH_ORDER
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
- do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col);
+ vp9_motion_field_reset_mvs(motion_field);
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
+ bsize, mi_row, mi_col);
+ }
}
}
-#endif // CHANGE_MV_SEARCH_ORDER
}
#endif // CONFIG_NON_GREEDY_MV
@@ -6872,7 +6807,7 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
int frame_idx, BLOCK_SIZE bsize) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
- YV12_BUFFER_CONFIG *ref_frame[3] = { NULL, NULL, NULL };
+ YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
VP9_COMMON *cm = &cpi->common;
struct scale_factors sf;
@@ -6922,7 +6857,7 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
// Prepare reference frame pointers. If any reference frame slot is
// unavailable, the pointer will be set to Null.
- for (idx = 0; idx < 3; ++idx) {
+ for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
int rf_idx = gf_picture[frame_idx].ref_frame[idx];
if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
}
@@ -6945,13 +6880,15 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
++square_block_idx) {
BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
- build_motion_field(cpi, xd, frame_idx, ref_frame, square_bsize);
+ build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
}
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
if (ref_frame_idx != -1) {
- predict_mv_mode_arr(cpi, x, gf_picture, frame_idx, tpl_frame, rf_idx,
- bsize);
+ MotionField *motion_field = vp9_motion_field_info_get_motion_field(
+ &cpi->motion_field_info, frame_idx, rf_idx, bsize);
+ predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
+ tpl_frame, rf_idx, bsize);
}
}
#endif
@@ -7001,7 +6938,7 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
const VP9_COMMON *cm = &cpi->common;
int rf_idx;
for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
int mi_row, mi_col;
int ref_frame_idx;
@@ -7022,8 +6959,9 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
- int_mv mv =
- *get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col);
+ int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
+ frame_idx, rf_idx, bsize,
+ mi_row, mi_col);
printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
mv.as_mv.col);
}
@@ -7067,26 +7005,8 @@ static void init_tpl_buffer(VP9_COMP *cpi) {
const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
#if CONFIG_NON_GREEDY_MV
- int sqr_bsize;
int rf_idx;
- // TODO(angiebird): This probably needs further modifications to support
- // frame scaling later on.
- if (cpi->feature_score_loc_alloc == 0) {
- // The smallest block size of motion field is 4x4, but the mi_unit is 8x8,
- // therefore the number of units is "mi_rows * mi_cols * 4" here.
- CHECK_MEM_ERROR(
- cm, cpi->feature_score_loc_arr,
- vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->feature_score_loc_arr)));
- CHECK_MEM_ERROR(cm, cpi->feature_score_loc_sort,
- vpx_calloc(mi_rows * mi_cols * 4,
- sizeof(*cpi->feature_score_loc_sort)));
- CHECK_MEM_ERROR(cm, cpi->feature_score_loc_heap,
- vpx_calloc(mi_rows * mi_cols * 4,
- sizeof(*cpi->feature_score_loc_heap)));
-
- cpi->feature_score_loc_alloc = 1;
- }
vpx_free(cpi->select_mv_arr);
CHECK_MEM_ERROR(
cm, cpi->select_mv_arr,
@@ -7101,16 +7021,7 @@ static void init_tpl_buffer(VP9_COMP *cpi) {
continue;
#if CONFIG_NON_GREEDY_MV
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
- for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
- vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]);
- CHECK_MEM_ERROR(
- cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize],
- vpx_calloc(
- mi_rows * mi_cols * 4,
- sizeof(
- *cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize])));
- }
+ for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
CHECK_MEM_ERROR(
cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
@@ -7141,6 +7052,25 @@ static void init_tpl_buffer(VP9_COMP *cpi) {
}
}
+static void free_tpl_buffer(VP9_COMP *cpi) {
+ int frame;
+#if CONFIG_NON_GREEDY_MV
+ vp9_free_motion_field_info(&cpi->motion_field_info);
+ vpx_free(cpi->select_mv_arr);
+#endif
+ for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
+#if CONFIG_NON_GREEDY_MV
+ int rf_idx;
+ for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
+ vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
+ vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
+ }
+#endif
+ vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
+ cpi->tpl_stats[frame].is_valid = 0;
+ }
+}
+
static void setup_tpl_stats(VP9_COMP *cpi) {
GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
const GF_GROUP *gf_group = &cpi->twopass.gf_group;
@@ -7165,9 +7095,39 @@ static void setup_tpl_stats(VP9_COMP *cpi) {
#endif // CONFIG_NON_GREEDY_MV
}
+static void init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
+ encode_frame_result->show_idx = -1; // Actual encoding doesn't happen.
+}
+
+#if !CONFIG_REALTIME_ONLY
+static void update_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result,
+ int show_idx,
+ FRAME_UPDATE_TYPE update_type,
+ const YV12_BUFFER_CONFIG *source_frame,
+ const YV12_BUFFER_CONFIG *coded_frame,
+ int quantize_index, uint32_t bit_depth,
+ uint32_t input_bit_depth) {
+ PSNR_STATS psnr;
+#if CONFIG_VP9_HIGHBITDEPTH
+ vpx_calc_highbd_psnr(source_frame, coded_frame, &psnr, bit_depth,
+ input_bit_depth);
+#else
+ (void)bit_depth;
+ (void)input_bit_depth;
+ vpx_calc_psnr(source_frame, coded_frame, &psnr);
+#endif
+ encode_frame_result->psnr = psnr.psnr[0];
+ encode_frame_result->sse = psnr.sse[0];
+ encode_frame_result->show_idx = show_idx;
+ encode_frame_result->update_type = update_type;
+ encode_frame_result->quantize_index = quantize_index;
+}
+#endif // !CONFIG_REALTIME_ONLY
+
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest, int64_t *time_stamp,
- int64_t *time_end, int flush) {
+ int64_t *time_end, int flush,
+ ENCODE_FRAME_RESULT *encode_frame_result) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
VP9_COMMON *const cm = &cpi->common;
BufferPool *const pool = cm->buffer_pool;
@@ -7179,6 +7139,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
int arf_src_index;
const int gf_group_index = cpi->twopass.gf_group.index;
int i;
+ init_encode_frame_result(encode_frame_result);
if (is_one_pass_cbr_svc(cpi)) {
vp9_one_pass_cbr_svc_start_layer(cpi);
@@ -7284,10 +7245,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
if (source != NULL) {
cm->show_frame = 1;
cm->intra_only = 0;
- // if the flags indicate intra frame, but if the current picture is for
- // non-zero spatial layer, it should not be an intra picture.
+ // If the flags indicate intra frame, but if the current picture is for
+ // spatial layer above first_spatial_layer_to_encode, it should not be an
+ // intra picture.
if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc &&
- cpi->svc.spatial_layer_id > 0) {
+ cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
}
@@ -7313,12 +7275,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
*frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
} else {
*size = 0;
-#if !CONFIG_REALTIME_ONLY
- if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
- vp9_end_first_pass(cpi); /* get last stats packet */
- cpi->twopass.first_pass_done = 1;
- }
-#endif // !CONFIG_REALTIME_ONLY
return -1;
}
@@ -7389,6 +7345,19 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cpi->kmeans_data_arr_alloc = 1;
}
+#if CONFIG_NON_GREEDY_MV
+ {
+ const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+ const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+ Status status = vp9_alloc_motion_field_info(
+ &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols);
+ if (status == STATUS_FAILED) {
+ vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR,
+ "vp9_alloc_motion_field_info failed");
+ }
+ }
+#endif // CONFIG_NON_GREEDY_MV
+
if (gf_group_index == 1 &&
cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
cpi->sf.enable_tpl_model) {
@@ -7432,6 +7401,25 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
vp9_first_pass(cpi, source);
} else if (oxcf->pass == 2 && !cpi->use_svc) {
Pass2Encode(cpi, size, dest, frame_flags);
+ // update_encode_frame_result() depends on twopass.gf_group.index and
+ // cm->new_fb_idx and cpi->Source are updated for current properly and have
+ // not been updated for the next frame yet.
+ // The update locations are as follows.
+ // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero()
+ // for the first frame in the gf_group and is updated for the next frame at
+ // vp9_twopass_postencode_update().
+ // 2) cpi->Source is updated at the beginging of this function, i.e.
+ // vp9_get_compressed_data()
+ // 3) cm->new_fb_idx is updated at the beginging of this function by
+ // get_free_fb(cm)
+ // TODO(angiebird): Improve the codebase to make the update of frame
+ // dependent variables more robust.
+ update_encode_frame_result(
+ encode_frame_result, source->show_idx,
+ cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
+ cpi->Source, get_frame_new_buffer(cm), vp9_get_quantizer(cpi),
+ cpi->oxcf.input_bit_depth, cm->bit_depth);
+ vp9_twopass_postencode_update(cpi);
} else if (cpi->use_svc) {
SvcEncode(cpi, size, dest, frame_flags);
} else {
@@ -7464,9 +7452,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
vpx_usec_timer_mark(&cmptimer);
cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
- // Should we calculate metrics for the frame.
- if (is_psnr_calc_enabled(cpi)) generate_psnr_packet(cpi);
-
if (cpi->keep_level_stats && oxcf->pass != 1)
update_level_info(cpi, size, arf_src_index);
@@ -7703,15 +7688,15 @@ int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
unsigned int height) {
VP9_COMMON *cm = &cpi->common;
#if CONFIG_VP9_HIGHBITDEPTH
- check_initial_width(cpi, cm->use_highbitdepth, 1, 1);
+ update_initial_width(cpi, cm->use_highbitdepth, 1, 1);
#else
- check_initial_width(cpi, 1, 1);
+ update_initial_width(cpi, 0, 1, 1);
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_TEMPORAL_DENOISING
setup_denoiser_buffer(cpi);
#endif
-
+ alloc_raw_frame_buffers(cpi);
if (width) {
cm->width = width;
if (cm->width > cpi->initial_width) {
@@ -7740,7 +7725,7 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
return;
}
-int vp9_get_quantizer(VP9_COMP *cpi) { return cpi->common.base_qindex; }
+int vp9_get_quantizer(const VP9_COMP *cpi) { return cpi->common.base_qindex; }
void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
if (flags &
diff --git a/libvpx/vp9/encoder/vp9_encoder.h b/libvpx/vp9/encoder/vp9_encoder.h
index f157fdfc5..0a8623efb 100644
--- a/libvpx/vp9/encoder/vp9_encoder.h
+++ b/libvpx/vp9/encoder/vp9_encoder.h
@@ -20,8 +20,10 @@
#include "vpx_dsp/ssim.h"
#endif
#include "vpx_dsp/variance.h"
+#include "vpx_dsp/psnr.h"
#include "vpx_ports/system_state.h"
#include "vpx_util/vpx_thread.h"
+#include "vpx_util/vpx_timestamp.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_ppflags.h"
@@ -152,7 +154,10 @@ typedef struct VP9EncoderConfig {
int height; // height of data passed to the compressor
unsigned int input_bit_depth; // Input bit depth.
double init_framerate; // set to passed in framerate
- int64_t target_bandwidth; // bandwidth to be used in bits per second
+ vpx_rational_t g_timebase; // equivalent to g_timebase in vpx_codec_enc_cfg_t
+ vpx_rational64_t g_timebase_in_ts; // g_timebase * TICKS_PER_SEC
+
+ int64_t target_bandwidth; // bandwidth to be used in bits per second
int noise_sensitivity; // pre processing blur: recommendation 0
int sharpness; // sharpening output: recommendation 0:
@@ -259,7 +264,6 @@ typedef struct VP9EncoderConfig {
unsigned int target_level;
vpx_fixed_buf_t two_pass_stats_in;
- struct vpx_codec_pkt_list *output_pkt_list;
#if CONFIG_FP_MB_STATS
vpx_fixed_buf_t firstpass_mb_stats_in;
@@ -293,16 +297,9 @@ typedef struct TplDepStats {
int ref_frame_index;
int_mv mv;
-
-#if CONFIG_NON_GREEDY_MV
- int ready[3];
- int64_t sse_arr[3];
- double feature_score;
-#endif
} TplDepStats;
#if CONFIG_NON_GREEDY_MV
-#define SQUARE_BLOCK_SIZES 4
#define ZERO_MV_MODE 0
#define NEW_MV_MODE 1
@@ -322,54 +319,11 @@ typedef struct TplDepFrame {
int base_qindex;
#if CONFIG_NON_GREEDY_MV
int lambda;
- int_mv *pyramid_mv_arr[3][SQUARE_BLOCK_SIZES];
int *mv_mode_arr[3];
double *rd_diff_arr[3];
#endif
} TplDepFrame;
-#if CONFIG_NON_GREEDY_MV
-static INLINE int get_square_block_idx(BLOCK_SIZE bsize) {
- if (bsize == BLOCK_4X4) {
- return 0;
- }
- if (bsize == BLOCK_8X8) {
- return 1;
- }
- if (bsize == BLOCK_16X16) {
- return 2;
- }
- if (bsize == BLOCK_32X32) {
- return 3;
- }
- assert(0 && "ERROR: non-square block size");
- return -1;
-}
-
-static INLINE BLOCK_SIZE square_block_idx_to_bsize(int square_block_idx) {
- if (square_block_idx == 0) {
- return BLOCK_4X4;
- }
- if (square_block_idx == 1) {
- return BLOCK_8X8;
- }
- if (square_block_idx == 2) {
- return BLOCK_16X16;
- }
- if (square_block_idx == 3) {
- return BLOCK_32X32;
- }
- assert(0 && "ERROR: invalid square_block_idx");
- return BLOCK_INVALID;
-}
-
-static INLINE int_mv *get_pyramid_mv(const TplDepFrame *tpl_frame, int rf_idx,
- BLOCK_SIZE bsize, int mi_row, int mi_col) {
- return &tpl_frame->pyramid_mv_arr[rf_idx][get_square_block_idx(bsize)]
- [mi_row * tpl_frame->stride + mi_col];
-}
-#endif
-
#define TPL_DEP_COST_SCALE_LOG2 4
// TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
@@ -533,7 +487,6 @@ typedef enum {
typedef struct {
int8_t level_index;
- uint8_t rc_config_updated;
uint8_t fail_flag;
int max_frame_size; // in bits
double max_cpb_size; // in bits
@@ -555,15 +508,6 @@ typedef struct EncFrameBuf {
// Maximum operating frame buffer size needed for a GOP using ARF reference.
#define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS)
-#if CONFIG_NON_GREEDY_MV
-typedef struct FEATURE_SCORE_LOC {
- int visited;
- double feature_score;
- int mi_row;
- int mi_col;
-} FEATURE_SCORE_LOC;
-#endif
-
#define MAX_KMEANS_GROUPS 8
typedef struct KMEANS_DATA {
@@ -572,7 +516,33 @@ typedef struct KMEANS_DATA {
int group_idx;
} KMEANS_DATA;
+#if CONFIG_RATE_CTRL
+typedef struct ENCODE_COMMAND {
+ int use_external_quantize_index;
+ int external_quantize_index;
+} ENCODE_COMMAND;
+
+static INLINE void encode_command_init(ENCODE_COMMAND *encode_command) {
+ vp9_zero(*encode_command);
+ encode_command->use_external_quantize_index = 0;
+ encode_command->external_quantize_index = -1;
+}
+
+static INLINE void encode_command_set_external_quantize_index(
+ ENCODE_COMMAND *encode_command, int quantize_index) {
+ encode_command->use_external_quantize_index = 1;
+ encode_command->external_quantize_index = quantize_index;
+}
+
+static INLINE void encode_command_reset_external_quantize_index(
+ ENCODE_COMMAND *encode_command) {
+ encode_command->use_external_quantize_index = 0;
+ encode_command->external_quantize_index = -1;
+}
+#endif // CONFIG_RATE_CTRL
+
typedef struct VP9_COMP {
+ FRAME_INFO frame_info;
QUANTS quants;
ThreadData td;
MB_MODE_INFO_EXT *mbmi_ext_base;
@@ -611,11 +581,8 @@ typedef struct VP9_COMP {
int kmeans_count_ls[MAX_KMEANS_GROUPS];
int kmeans_ctr_num;
#if CONFIG_NON_GREEDY_MV
+ MotionFieldInfo motion_field_info;
int tpl_ready;
- int feature_score_loc_alloc;
- FEATURE_SCORE_LOC *feature_score_loc_arr;
- FEATURE_SCORE_LOC **feature_score_loc_sort;
- FEATURE_SCORE_LOC **feature_score_loc_heap;
int_mv *select_mv_arr;
#endif
@@ -878,11 +845,23 @@ typedef struct VP9_COMP {
int multi_layer_arf;
vpx_roi_map_t roi;
+#if CONFIG_RATE_CTRL
+ ENCODE_COMMAND encode_command;
+#endif
} VP9_COMP;
+typedef struct ENCODE_FRAME_RESULT {
+ int show_idx;
+ FRAME_UPDATE_TYPE update_type;
+ double psnr;
+ uint64_t sse;
+ int quantize_index;
+} ENCODE_FRAME_RESULT;
+
void vp9_initialize_enc(void);
-struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
+void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt);
+struct VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
BufferPool *const pool);
void vp9_remove_compressor(VP9_COMP *cpi);
@@ -896,7 +875,8 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest, int64_t *time_stamp,
- int64_t *time_end, int flush);
+ int64_t *time_end, int flush,
+ ENCODE_FRAME_RESULT *encode_frame_result);
int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
vp9_ppflags_t *flags);
@@ -948,7 +928,7 @@ static INLINE void stack_init(int *stack, int length) {
for (idx = 0; idx < length; ++idx) stack[idx] = -1;
}
-int vp9_get_quantizer(struct VP9_COMP *cpi);
+int vp9_get_quantizer(const VP9_COMP *cpi);
static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) {
return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
@@ -1121,6 +1101,8 @@ void vp9_new_framerate(VP9_COMP *cpi, double framerate);
void vp9_set_row_mt(VP9_COMP *cpi);
+int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr);
+
#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
#ifdef __cplusplus
diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c
index e0acf563b..57ab583cf 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/libvpx/vp9/encoder/vp9_firstpass.c
@@ -84,14 +84,8 @@ static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) {
return 1;
}
-static void output_stats(FIRSTPASS_STATS *stats,
- struct vpx_codec_pkt_list *pktlist) {
- struct vpx_codec_cx_pkt pkt;
- pkt.kind = VPX_CODEC_STATS_PKT;
- pkt.data.twopass_stats.buf = stats;
- pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
- vpx_codec_pkt_list_add(pktlist, &pkt);
-
+static void output_stats(FIRSTPASS_STATS *stats) {
+ (void)stats;
// TEMP debug code
#if OUTPUT_FPF
{
@@ -220,14 +214,14 @@ static void subtract_stats(FIRSTPASS_STATS *section,
// bars and partially discounts other 0 energy areas.
#define MIN_ACTIVE_AREA 0.5
#define MAX_ACTIVE_AREA 1.0
-static double calculate_active_area(const VP9_COMP *cpi,
+static double calculate_active_area(const FRAME_INFO *frame_info,
const FIRSTPASS_STATS *this_frame) {
double active_pct;
active_pct =
1.0 -
((this_frame->intra_skip_pct / 2) +
- ((this_frame->inactive_zone_rows * 2) / (double)cpi->common.mb_rows));
+ ((this_frame->inactive_zone_rows * 2) / (double)frame_info->mb_rows));
return fclamp(active_pct, MIN_ACTIVE_AREA, MAX_ACTIVE_AREA);
}
@@ -260,17 +254,16 @@ static double calculate_mod_frame_score(const VP9_COMP *cpi,
// remaining active MBs. The correction here assumes that coding
// 0.5N blocks of complexity 2X is a little easier than coding N
// blocks of complexity X.
- modified_score *=
- pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION);
+ modified_score *= pow(calculate_active_area(&cpi->frame_info, this_frame),
+ ACT_AREA_CORRECTION);
return modified_score;
}
-static double calculate_norm_frame_score(const VP9_COMP *cpi,
- const TWO_PASS *twopass,
- const VP9EncoderConfig *oxcf,
- const FIRSTPASS_STATS *this_frame,
- const double av_err) {
+static double calc_norm_frame_score(const VP9EncoderConfig *oxcf,
+ const FRAME_INFO *frame_info,
+ const FIRSTPASS_STATS *this_frame,
+ double mean_mod_score, double av_err) {
double modified_score =
av_err * pow(this_frame->coded_error * this_frame->weight /
DOUBLE_DIVIDE_CHECK(av_err),
@@ -285,14 +278,22 @@ static double calculate_norm_frame_score(const VP9_COMP *cpi,
// 0.5N blocks of complexity 2X is a little easier than coding N
// blocks of complexity X.
modified_score *=
- pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION);
+ pow(calculate_active_area(frame_info, this_frame), ACT_AREA_CORRECTION);
// Normalize to a midpoint score.
- modified_score /= DOUBLE_DIVIDE_CHECK(twopass->mean_mod_score);
-
+ modified_score /= DOUBLE_DIVIDE_CHECK(mean_mod_score);
return fclamp(modified_score, min_score, max_score);
}
+static double calculate_norm_frame_score(const VP9_COMP *cpi,
+ const TWO_PASS *twopass,
+ const VP9EncoderConfig *oxcf,
+ const FIRSTPASS_STATS *this_frame,
+ const double av_err) {
+ return calc_norm_frame_score(oxcf, &cpi->frame_info, this_frame,
+ twopass->mean_mod_score, av_err);
+}
+
// This function returns the maximum target rate per frame.
static int frame_max_bits(const RATE_CONTROL *rc,
const VP9EncoderConfig *oxcf) {
@@ -312,7 +313,8 @@ void vp9_init_first_pass(VP9_COMP *cpi) {
}
void vp9_end_first_pass(VP9_COMP *cpi) {
- output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
+ output_stats(&cpi->twopass.total_stats);
+ cpi->twopass.first_pass_done = 1;
vpx_free(cpi->twopass.fp_mb_float_stats);
cpi->twopass.fp_mb_float_stats = NULL;
}
@@ -1421,7 +1423,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
// Don't want to do output stats with a stack variable!
twopass->this_frame_stats = fps;
- output_stats(&twopass->this_frame_stats, cpi->output_pkt_list);
+ output_stats(&twopass->this_frame_stats);
accumulate_stats(&twopass->total_stats, &fps);
#if CONFIG_FP_MB_STATS
@@ -1747,15 +1749,16 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
#define LOW_CODED_ERR_PER_MB 10.0
#define NCOUNT_FRAME_II_THRESH 6.0
-static double get_sr_decay_rate(const VP9_COMP *cpi,
+static double get_sr_decay_rate(const FRAME_INFO *frame_info,
const FIRSTPASS_STATS *frame) {
double sr_diff = (frame->sr_coded_error - frame->coded_error);
double sr_decay = 1.0;
double modified_pct_inter;
double modified_pcnt_intra;
const double motion_amplitude_part =
- frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) /
- (cpi->initial_height + cpi->initial_width));
+ frame->pcnt_motion *
+ ((frame->mvc_abs + frame->mvr_abs) /
+ (frame_info->frame_height + frame_info->frame_width));
modified_pct_inter = frame->pcnt_inter;
if ((frame->coded_error > LOW_CODED_ERR_PER_MB) &&
@@ -1776,74 +1779,73 @@ static double get_sr_decay_rate(const VP9_COMP *cpi,
// This function gives an estimate of how badly we believe the prediction
// quality is decaying from frame to frame.
-static double get_zero_motion_factor(const VP9_COMP *cpi,
- const FIRSTPASS_STATS *frame) {
- const double zero_motion_pct = frame->pcnt_inter - frame->pcnt_motion;
- double sr_decay = get_sr_decay_rate(cpi, frame);
+static double get_zero_motion_factor(const FRAME_INFO *frame_info,
+ const FIRSTPASS_STATS *frame_stats) {
+ const double zero_motion_pct =
+ frame_stats->pcnt_inter - frame_stats->pcnt_motion;
+ double sr_decay = get_sr_decay_rate(frame_info, frame_stats);
return VPXMIN(sr_decay, zero_motion_pct);
}
#define ZM_POWER_FACTOR 0.75
-static double get_prediction_decay_rate(const VP9_COMP *cpi,
- const FIRSTPASS_STATS *next_frame) {
- const double sr_decay_rate = get_sr_decay_rate(cpi, next_frame);
+static double get_prediction_decay_rate(const FRAME_INFO *frame_info,
+ const FIRSTPASS_STATS *frame_stats) {
+ const double sr_decay_rate = get_sr_decay_rate(frame_info, frame_stats);
const double zero_motion_factor =
- (0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion),
+ (0.95 * pow((frame_stats->pcnt_inter - frame_stats->pcnt_motion),
ZM_POWER_FACTOR));
return VPXMAX(zero_motion_factor,
(sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));
}
+static int get_show_idx(const TWO_PASS *twopass) {
+ return (int)(twopass->stats_in - twopass->stats_in_start);
+}
// Function to test for a condition where a complex transition is followed
// by a static section. For example in slide shows where there is a fade
// between slides. This is to help with more optimal kf and gf positioning.
-static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval,
- int still_interval,
- double loop_decay_rate,
- double last_decay_rate) {
- TWO_PASS *const twopass = &cpi->twopass;
- RATE_CONTROL *const rc = &cpi->rc;
-
- // Break clause to detect very still sections after motion
- // For example a static image after a fade or other transition
- // instead of a clean scene cut.
- if (frame_interval > rc->min_gf_interval && loop_decay_rate >= 0.999 &&
- last_decay_rate < 0.9) {
- int j;
-
- // Look ahead a few frames to see if static condition persists...
- for (j = 0; j < still_interval; ++j) {
- const FIRSTPASS_STATS *stats = &twopass->stats_in[j];
- if (stats >= twopass->stats_in_end) break;
-
- if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break;
- }
+static int check_transition_to_still(const FIRST_PASS_INFO *first_pass_info,
+ int show_idx, int still_interval) {
+ int j;
+ int num_frames = fps_get_num_frames(first_pass_info);
+ if (show_idx + still_interval > num_frames) {
+ return 0;
+ }
- // Only if it does do we signal a transition to still.
- return j == still_interval;
+ // Look ahead a few frames to see if static condition persists...
+ for (j = 0; j < still_interval; ++j) {
+ const FIRSTPASS_STATS *stats =
+ fps_get_frame_stats(first_pass_info, show_idx + j);
+ if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break;
}
- return 0;
+ // Only if it does do we signal a transition to still.
+ return j == still_interval;
}
// This function detects a flash through the high relative pcnt_second_ref
// score in the frame following a flash frame. The offset passed in should
// reflect this.
-static int detect_flash(const TWO_PASS *twopass, int offset) {
- const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset);
-
+static int detect_flash_from_frame_stats(const FIRSTPASS_STATS *frame_stats) {
// What we are looking for here is a situation where there is a
// brief break in prediction (such as a flash) but subsequent frames
// are reasonably well predicted by an earlier (pre flash) frame.
// The recovery after a flash is indicated by a high pcnt_second_ref
// useage or a second ref coded error notabley lower than the last
// frame coded error.
- return next_frame != NULL &&
- ((next_frame->sr_coded_error < next_frame->coded_error) ||
- ((next_frame->pcnt_second_ref > next_frame->pcnt_inter) &&
- (next_frame->pcnt_second_ref >= 0.5)));
+ if (frame_stats == NULL) {
+ return 0;
+ }
+ return (frame_stats->sr_coded_error < frame_stats->coded_error) ||
+ ((frame_stats->pcnt_second_ref > frame_stats->pcnt_inter) &&
+ (frame_stats->pcnt_second_ref >= 0.5));
+}
+
+static int detect_flash(const TWO_PASS *twopass, int offset) {
+ const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset);
+ return detect_flash_from_frame_stats(next_frame);
}
// Update the motion related elements to the GF arf boost calculation.
@@ -1876,13 +1878,15 @@ static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats,
#define BASELINE_ERR_PER_MB 12500.0
#define GF_MAX_BOOST 96.0
-static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame,
+static double calc_frame_boost(const FRAME_INFO *frame_info,
+ const FIRSTPASS_STATS *this_frame,
+ int avg_frame_qindex,
double this_frame_mv_in_out) {
double frame_boost;
- const double lq = vp9_convert_qindex_to_q(
- cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth);
+ const double lq =
+ vp9_convert_qindex_to_q(avg_frame_qindex, frame_info->bit_depth);
const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5);
- const double active_area = calculate_active_area(cpi, this_frame);
+ const double active_area = calculate_active_area(frame_info, this_frame);
// Underlying boost factor is based on inter error ratio.
frame_boost = (BASELINE_ERR_PER_MB * active_area) /
@@ -1921,7 +1925,8 @@ static double calc_kf_frame_boost(VP9_COMP *cpi,
const double lq = vp9_convert_qindex_to_q(
cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth);
const double boost_q_correction = VPXMIN((0.50 + (lq * 0.015)), 2.00);
- const double active_area = calculate_active_area(cpi, this_frame);
+ const double active_area =
+ calculate_active_area(&cpi->frame_info, this_frame);
// Underlying boost factor is based on inter error ratio.
frame_boost = (kf_err_per_mb(cpi) * active_area) /
@@ -1946,8 +1951,10 @@ static double calc_kf_frame_boost(VP9_COMP *cpi,
return VPXMIN(frame_boost, max_boost * boost_q_correction);
}
-static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
- TWO_PASS *const twopass = &cpi->twopass;
+static int compute_arf_boost(const FRAME_INFO *frame_info,
+ const FIRST_PASS_INFO *first_pass_info,
+ int arf_show_idx, int f_frames, int b_frames,
+ int avg_frame_qindex) {
int i;
double boost_score = 0.0;
double mv_ratio_accumulator = 0.0;
@@ -1960,7 +1967,10 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
// Search forward from the proposed arf/next gf position.
for (i = 0; i < f_frames; ++i) {
- const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i);
+ const FIRSTPASS_STATS *this_frame =
+ fps_get_frame_stats(first_pass_info, arf_show_idx + i);
+ const FIRSTPASS_STATS *next_frame =
+ fps_get_frame_stats(first_pass_info, arf_show_idx + i + 1);
if (this_frame == NULL) break;
// Update the motion related elements to the boost calculation.
@@ -1970,17 +1980,19 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
// We want to discount the flash frame itself and the recovery
// frame that follows as both will have poor scores.
- flash_detected = detect_flash(twopass, i) || detect_flash(twopass, i + 1);
+ flash_detected = detect_flash_from_frame_stats(this_frame) ||
+ detect_flash_from_frame_stats(next_frame);
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
+ decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR
: decay_accumulator;
}
- boost_score += decay_accumulator *
- calc_frame_boost(cpi, this_frame, this_frame_mv_in_out);
+ boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame,
+ avg_frame_qindex,
+ this_frame_mv_in_out);
}
arf_boost = (int)boost_score;
@@ -1995,7 +2007,10 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
// Search backward towards last gf position.
for (i = -1; i >= -b_frames; --i) {
- const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i);
+ const FIRSTPASS_STATS *this_frame =
+ fps_get_frame_stats(first_pass_info, arf_show_idx + i);
+ const FIRSTPASS_STATS *next_frame =
+ fps_get_frame_stats(first_pass_info, arf_show_idx + i + 1);
if (this_frame == NULL) break;
// Update the motion related elements to the boost calculation.
@@ -2005,17 +2020,19 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
// We want to discount the the flash frame itself and the recovery
// frame that follows as both will have poor scores.
- flash_detected = detect_flash(twopass, i) || detect_flash(twopass, i + 1);
+ flash_detected = detect_flash_from_frame_stats(this_frame) ||
+ detect_flash_from_frame_stats(next_frame);
// Cumulative effect of prediction quality decay.
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
+ decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR
: decay_accumulator;
}
- boost_score += decay_accumulator *
- calc_frame_boost(cpi, this_frame, this_frame_mv_in_out);
+ boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame,
+ avg_frame_qindex,
+ this_frame_mv_in_out);
}
arf_boost += (int)boost_score;
@@ -2026,6 +2043,15 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
return arf_boost;
}
+static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
+ const FRAME_INFO *frame_info = &cpi->frame_info;
+ TWO_PASS *const twopass = &cpi->twopass;
+ const int avg_inter_frame_qindex = cpi->rc.avg_frame_qindex[INTER_FRAME];
+ int arf_show_idx = get_show_idx(twopass);
+ return compute_arf_boost(frame_info, &twopass->first_pass_info, arf_show_idx,
+ f_frames, b_frames, avg_inter_frame_qindex);
+}
+
// Calculate a section intra ratio used in setting max loop filter.
static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin,
const FIRSTPASS_STATS *end,
@@ -2060,8 +2086,19 @@ static int64_t calculate_total_gf_group_bits(VP9_COMP *cpi,
// Calculate the bits to be allocated to the group as a whole.
if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0.0)) {
+ int key_frame_interval = rc->frames_since_key + rc->frames_to_key;
+ int distance_from_next_key_frame =
+ rc->frames_to_key -
+ (rc->baseline_gf_interval + rc->source_alt_ref_pending);
+ int max_gf_bits_bias = rc->avg_frame_bandwidth;
+ double gf_interval_bias_bits_normalize_factor =
+ (double)rc->baseline_gf_interval / 16;
total_group_bits = (int64_t)(twopass->kf_group_bits *
(gf_group_err / twopass->kf_group_error_left));
+ // TODO(ravi): Experiment with different values of max_gf_bits_bias
+ total_group_bits +=
+ (int64_t)((double)distance_from_next_key_frame / key_frame_interval *
+ max_gf_bits_bias * gf_interval_bias_bits_normalize_factor);
} else {
total_group_bits = 0;
}
@@ -2415,194 +2452,94 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise,
#define ARF_ABS_ZOOM_THRESH 4.0
#define MAX_GF_BOOST 5400
-static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
- VP9_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
- VP9EncoderConfig *const oxcf = &cpi->oxcf;
- TWO_PASS *const twopass = &cpi->twopass;
- FIRSTPASS_STATS next_frame;
- const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
- int i;
- double gf_group_err = 0.0;
- double gf_group_raw_error = 0.0;
- double gf_group_noise = 0.0;
- double gf_group_skip_pct = 0.0;
- double gf_group_inactive_zone_rows = 0.0;
- double gf_group_inter = 0.0;
- double gf_group_motion = 0.0;
- double gf_first_frame_err = 0.0;
- double mod_frame_err = 0.0;
+typedef struct RANGE {
+ int min;
+ int max;
+} RANGE;
- double mv_ratio_accumulator = 0.0;
- double zero_motion_accumulator = 1.0;
+static int get_gop_coding_frame_num(
+ int *use_alt_ref, const FRAME_INFO *frame_info,
+ const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc,
+ int gf_start_show_idx, const RANGE *active_gf_interval,
+ double gop_intra_factor, int lag_in_frames) {
double loop_decay_rate = 1.00;
- double last_loop_decay_rate = 1.00;
-
+ double mv_ratio_accumulator = 0.0;
double this_frame_mv_in_out = 0.0;
double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
- double mv_ratio_accumulator_thresh;
- double abs_mv_in_out_thresh;
double sr_accumulator = 0.0;
- const double av_err = get_distribution_av_err(cpi, twopass);
- unsigned int allow_alt_ref = is_altref_enabled(cpi);
-
- int flash_detected;
- int active_max_gf_interval;
- int active_min_gf_interval;
- int64_t gf_group_bits;
- int gf_arf_bits;
- const int is_key_frame = frame_is_intra_only(cm);
- const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
- int is_alt_ref_flash = 0;
-
- double gop_intra_factor = 1.0;
- int gop_frames;
-
- // Reset the GF group data structures unless this is a key
- // frame in which case it will already have been done.
- if (is_key_frame == 0) {
- vp9_zero(twopass->gf_group);
- }
-
- vpx_clear_system_state();
- vp9_zero(next_frame);
-
- // Load stats for the current frame.
- mod_frame_err =
- calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
-
- // Note the error of the frame at the start of the group. This will be
- // the GF frame error if we code a normal gf.
- gf_first_frame_err = mod_frame_err;
-
- // If this is a key frame or the overlay from a previous arf then
- // the error score / cost of this frame has already been accounted for.
- if (arf_active_or_kf) {
- gf_group_err -= gf_first_frame_err;
- gf_group_raw_error -= this_frame->coded_error;
- gf_group_noise -= this_frame->frame_noise_energy;
- gf_group_skip_pct -= this_frame->intra_skip_pct;
- gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
- gf_group_inter -= this_frame->pcnt_inter;
- gf_group_motion -= this_frame->pcnt_motion;
- }
-
// Motion breakout threshold for loop below depends on image size.
- mv_ratio_accumulator_thresh =
- (cpi->initial_height + cpi->initial_width) / 4.0;
- abs_mv_in_out_thresh = ARF_ABS_ZOOM_THRESH;
-
- // Set a maximum and minimum interval for the GF group.
- // If the image appears almost completely static we can extend beyond this.
- {
- int int_max_q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
- cpi->common.bit_depth));
- int q_term = (cm->current_video_frame == 0)
- ? int_max_q / 32
- : (int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex,
- cpi->common.bit_depth) /
- 6);
- active_min_gf_interval =
- rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200);
- active_min_gf_interval =
- VPXMIN(active_min_gf_interval, rc->max_gf_interval + arf_active_or_kf);
-
- // The value chosen depends on the active Q range. At low Q we have
- // bits to spare and are better with a smaller interval and smaller boost.
- // At high Q when there are few bits to spare we are better with a longer
- // interval to spread the cost of the GF.
- active_max_gf_interval = 11 + arf_active_or_kf + VPXMIN(5, q_term);
-
- // Force max GF interval to be odd.
- active_max_gf_interval = active_max_gf_interval | 0x01;
-
- // We have: active_min_gf_interval <=
- // rc->max_gf_interval + arf_active_or_kf.
- if (active_max_gf_interval < active_min_gf_interval) {
- active_max_gf_interval = active_min_gf_interval;
- } else {
- active_max_gf_interval = VPXMIN(active_max_gf_interval,
- rc->max_gf_interval + arf_active_or_kf);
+ double mv_ratio_accumulator_thresh =
+ (frame_info->frame_height + frame_info->frame_width) / 4.0;
+ double zero_motion_accumulator = 1.0;
+ int gop_coding_frames;
+
+ *use_alt_ref = 1;
+ gop_coding_frames = 0;
+ while (gop_coding_frames < rc->static_scene_max_gf_interval &&
+ gop_coding_frames < rc->frames_to_key) {
+ const FIRSTPASS_STATS *next_next_frame;
+ const FIRSTPASS_STATS *next_frame;
+ int flash_detected;
+ ++gop_coding_frames;
+
+ next_frame = fps_get_frame_stats(first_pass_info,
+ gf_start_show_idx + gop_coding_frames);
+ if (next_frame == NULL) {
+ break;
}
- // Would the active max drop us out just before the near the next kf?
- if ((active_max_gf_interval <= rc->frames_to_key) &&
- (active_max_gf_interval >= (rc->frames_to_key - rc->min_gf_interval)))
- active_max_gf_interval = rc->frames_to_key / 2;
- }
- active_max_gf_interval =
- VPXMAX(active_max_gf_interval, active_min_gf_interval);
-
- if (cpi->multi_layer_arf) {
- int layers = 0;
- int max_layers = VPXMIN(MAX_ARF_LAYERS, cpi->oxcf.enable_auto_arf);
-
- // Adapt the intra_error factor to active_max_gf_interval limit.
- for (i = active_max_gf_interval; i > 0; i >>= 1) ++layers;
-
- layers = VPXMIN(max_layers, layers);
- gop_intra_factor += (layers * 0.25);
- }
-
- i = 0;
- while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
- ++i;
-
- // Accumulate error score of frames in this gf group.
- mod_frame_err =
- calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
- gf_group_err += mod_frame_err;
- gf_group_raw_error += this_frame->coded_error;
- gf_group_noise += this_frame->frame_noise_energy;
- gf_group_skip_pct += this_frame->intra_skip_pct;
- gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
- gf_group_inter += this_frame->pcnt_inter;
- gf_group_motion += this_frame->pcnt_motion;
-
- if (EOF == input_stats(twopass, &next_frame)) break;
-
// Test for the case where there is a brief flash but the prediction
// quality back to an earlier frame is then restored.
- flash_detected = detect_flash(twopass, 0);
+ next_next_frame = fps_get_frame_stats(
+ first_pass_info, gf_start_show_idx + gop_coding_frames + 1);
+ flash_detected = detect_flash_from_frame_stats(next_next_frame);
// Update the motion related elements to the boost calculation.
accumulate_frame_motion_stats(
- &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
+ next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator, &mv_ratio_accumulator);
// Monitor for static sections.
- if ((rc->frames_since_key + i - 1) > 1) {
- zero_motion_accumulator = VPXMIN(
- zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+ if ((rc->frames_since_key + gop_coding_frames - 1) > 1) {
+ zero_motion_accumulator =
+ VPXMIN(zero_motion_accumulator,
+ get_zero_motion_factor(frame_info, next_frame));
}
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
- last_loop_decay_rate = loop_decay_rate;
- loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+ double last_loop_decay_rate = loop_decay_rate;
+ loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame);
// Break clause to detect very still sections after motion. For example,
// a static image after a fade or other transition.
- if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
- last_loop_decay_rate)) {
- allow_alt_ref = 0;
- break;
+ if (gop_coding_frames > rc->min_gf_interval && loop_decay_rate >= 0.999 &&
+ last_loop_decay_rate < 0.9) {
+ int still_interval = 5;
+ if (check_transition_to_still(first_pass_info,
+ gf_start_show_idx + gop_coding_frames,
+ still_interval)) {
+ *use_alt_ref = 0;
+ break;
+ }
}
// Update the accumulator for second ref error difference.
// This is intended to give an indication of how much the coded error is
// increasing over time.
- if (i == 1) {
- sr_accumulator += next_frame.coded_error;
+ if (gop_coding_frames == 1) {
+ sr_accumulator += next_frame->coded_error;
} else {
- sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error);
+ sr_accumulator +=
+ (next_frame->sr_coded_error - next_frame->coded_error);
}
}
// Break out conditions.
- // Break at maximum of active_max_gf_interval unless almost totally static.
+ // Break at maximum of active_gf_interval->max unless almost totally
+ // static.
//
// Note that the addition of a test of rc->source_alt_ref_active is
// deliberate. The effect of this is that after a normal altref group even
@@ -2612,59 +2549,230 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// such as a fade, the arf group spanning the transition may not be coded
// at a very high quality and hence this frame (with its overlay) is a
// poor golden frame to use for an extended group.
- if (((i >= active_max_gf_interval) &&
- ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) ||
- (
- // Don't break out with a very short interval.
- (i >= active_min_gf_interval) &&
- // If possible dont break very close to a kf
- ((rc->frames_to_key - i) >= rc->min_gf_interval) && (i & 0x01) &&
- (!flash_detected) &&
- ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
- (abs_mv_in_out_accumulator > abs_mv_in_out_thresh) ||
- (sr_accumulator > gop_intra_factor * next_frame.intra_error)))) {
+ if ((gop_coding_frames >= active_gf_interval->max) &&
+ ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) {
+ break;
+ }
+ if (
+ // Don't break out with a very short interval.
+ (gop_coding_frames >= active_gf_interval->min) &&
+ // If possible dont break very close to a kf
+ ((rc->frames_to_key - gop_coding_frames) >= rc->min_gf_interval) &&
+ (gop_coding_frames & 0x01) && (!flash_detected) &&
+ ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
+ (abs_mv_in_out_accumulator > ARF_ABS_ZOOM_THRESH) ||
+ (sr_accumulator > gop_intra_factor * next_frame->intra_error))) {
break;
}
+ }
+ *use_alt_ref &= zero_motion_accumulator < 0.995;
+ *use_alt_ref &= gop_coding_frames < lag_in_frames;
+ *use_alt_ref &= gop_coding_frames >= rc->min_gf_interval;
+ return gop_coding_frames;
+}
+
+static RANGE get_active_gf_inverval_range(
+ const FRAME_INFO *frame_info, const RATE_CONTROL *rc, int arf_active_or_kf,
+ int gf_start_show_idx, int active_worst_quality, int last_boosted_qindex) {
+ RANGE active_gf_interval;
+#if CONFIG_RATE_CTRL
+ (void)frame_info;
+ (void)gf_start_show_idx;
+ (void)active_worst_quality;
+ (void)last_boosted_qindex;
+ active_gf_interval.min = rc->min_gf_interval + arf_active_or_kf + 2;
- *this_frame = next_frame;
+ active_gf_interval.max = 16 + arf_active_or_kf;
+
+ if ((active_gf_interval.max <= rc->frames_to_key) &&
+ (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) {
+ active_gf_interval.min = rc->frames_to_key / 2;
+ active_gf_interval.max = rc->frames_to_key / 2;
+ }
+#else
+ int int_max_q = (int)(vp9_convert_qindex_to_q(active_worst_quality,
+ frame_info->bit_depth));
+ int q_term = (gf_start_show_idx == 0)
+ ? int_max_q / 32
+ : (int)(vp9_convert_qindex_to_q(last_boosted_qindex,
+ frame_info->bit_depth) /
+ 6);
+ active_gf_interval.min =
+ rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200);
+ active_gf_interval.min =
+ VPXMIN(active_gf_interval.min, rc->max_gf_interval + arf_active_or_kf);
+
+ // The value chosen depends on the active Q range. At low Q we have
+ // bits to spare and are better with a smaller interval and smaller boost.
+ // At high Q when there are few bits to spare we are better with a longer
+ // interval to spread the cost of the GF.
+ active_gf_interval.max = 11 + arf_active_or_kf + VPXMIN(5, q_term);
+
+ // Force max GF interval to be odd.
+ active_gf_interval.max = active_gf_interval.max | 0x01;
+
+ // We have: active_gf_interval.min <=
+ // rc->max_gf_interval + arf_active_or_kf.
+ if (active_gf_interval.max < active_gf_interval.min) {
+ active_gf_interval.max = active_gf_interval.min;
+ } else {
+ active_gf_interval.max =
+ VPXMIN(active_gf_interval.max, rc->max_gf_interval + arf_active_or_kf);
+ }
+
+ // Would the active max drop us out just before the near the next kf?
+ if ((active_gf_interval.max <= rc->frames_to_key) &&
+ (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) {
+ active_gf_interval.max = rc->frames_to_key / 2;
+ }
+ active_gf_interval.max =
+ VPXMAX(active_gf_interval.max, active_gf_interval.min);
+#endif
+ return active_gf_interval;
+}
+
+static int get_arf_layers(int multi_layer_arf, int max_layers,
+ int coding_frame_num) {
+ assert(max_layers <= MAX_ARF_LAYERS);
+ if (multi_layer_arf) {
+ int layers = 0;
+ int i;
+ for (i = coding_frame_num; i > 0; i >>= 1) {
+ ++layers;
+ }
+ layers = VPXMIN(max_layers, layers);
+ return layers;
+ } else {
+ return 1;
+ }
+}
+
+static void define_gf_group(VP9_COMP *cpi, int gf_start_show_idx) {
+ VP9_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ TWO_PASS *const twopass = &cpi->twopass;
+ const FRAME_INFO *frame_info = &cpi->frame_info;
+ const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;
+ const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
+ int gop_coding_frames;
+
+ double gf_group_err = 0.0;
+ double gf_group_raw_error = 0.0;
+ double gf_group_noise = 0.0;
+ double gf_group_skip_pct = 0.0;
+ double gf_group_inactive_zone_rows = 0.0;
+ double gf_group_inter = 0.0;
+ double gf_group_motion = 0.0;
+
+ int allow_alt_ref = is_altref_enabled(cpi);
+ int use_alt_ref;
+
+ int64_t gf_group_bits;
+ int gf_arf_bits;
+ const int is_key_frame = frame_is_intra_only(cm);
+ // If this is a key frame or the overlay from a previous arf then
+ // the error score / cost of this frame has already been accounted for.
+ const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
+ int is_alt_ref_flash = 0;
+
+ double gop_intra_factor;
+ int gop_frames;
+ RANGE active_gf_interval;
+
+ // Reset the GF group data structures unless this is a key
+ // frame in which case it will already have been done.
+ if (is_key_frame == 0) {
+ vp9_zero(twopass->gf_group);
+ }
+
+ vpx_clear_system_state();
+
+ active_gf_interval = get_active_gf_inverval_range(
+ frame_info, rc, arf_active_or_kf, gf_start_show_idx,
+ twopass->active_worst_quality, rc->last_boosted_qindex);
+
+ if (cpi->multi_layer_arf) {
+ int arf_layers = get_arf_layers(cpi->multi_layer_arf, oxcf->enable_auto_arf,
+ active_gf_interval.max);
+ gop_intra_factor = 1.0 + 0.25 * arf_layers;
+ } else {
+ gop_intra_factor = 1.0;
+ }
+
+ {
+ gop_coding_frames = get_gop_coding_frame_num(
+ &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx,
+ &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames);
+ use_alt_ref &= allow_alt_ref;
}
// Was the group length constrained by the requirement for a new KF?
- rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
+ rc->constrained_gf_group = (gop_coding_frames >= rc->frames_to_key) ? 1 : 0;
// Should we use the alternate reference frame.
- if ((zero_motion_accumulator < 0.995) && allow_alt_ref &&
- (twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) &&
- (i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) {
- const int forward_frames = (rc->frames_to_key - i >= i - 1)
- ? i - 1
- : VPXMAX(0, rc->frames_to_key - i);
+ if (use_alt_ref) {
+ const int f_frames =
+ (rc->frames_to_key - gop_coding_frames >= gop_coding_frames - 1)
+ ? gop_coding_frames - 1
+ : VPXMAX(0, rc->frames_to_key - gop_coding_frames);
+ const int b_frames = gop_coding_frames - 1;
+ const int avg_inter_frame_qindex = rc->avg_frame_qindex[INTER_FRAME];
+ // TODO(angiebird): figure out why arf's location is assigned this way
+ const int arf_show_idx = VPXMIN(gf_start_show_idx + gop_coding_frames + 1,
+ fps_get_num_frames(first_pass_info));
// Calculate the boost for alt ref.
- rc->gfu_boost = calc_arf_boost(cpi, forward_frames, (i - 1));
+ rc->gfu_boost =
+ compute_arf_boost(frame_info, first_pass_info, arf_show_idx, f_frames,
+ b_frames, avg_inter_frame_qindex);
rc->source_alt_ref_pending = 1;
} else {
- reset_fpf_position(twopass, start_pos);
- rc->gfu_boost = VPXMIN(MAX_GF_BOOST, calc_arf_boost(cpi, (i - 1), 0));
+ const int f_frames = gop_coding_frames - 1;
+ const int b_frames = 0;
+ const int avg_inter_frame_qindex = rc->avg_frame_qindex[INTER_FRAME];
+ // TODO(angiebird): figure out why arf's location is assigned this way
+ const int gld_show_idx =
+ VPXMIN(gf_start_show_idx + 1, fps_get_num_frames(first_pass_info));
+ const int arf_boost =
+ compute_arf_boost(frame_info, first_pass_info, gld_show_idx, f_frames,
+ b_frames, avg_inter_frame_qindex);
+ rc->gfu_boost = VPXMIN(MAX_GF_BOOST, arf_boost);
rc->source_alt_ref_pending = 0;
}
#define LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR 0.2
rc->arf_active_best_quality_adjustment_factor = 1.0;
- if (rc->source_alt_ref_pending && !is_lossless_requested(&cpi->oxcf) &&
- rc->frames_to_key <= rc->arf_active_best_quality_adjustment_window) {
- rc->arf_active_best_quality_adjustment_factor =
- LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR +
- (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) *
- (rc->frames_to_key - i) /
- VPXMAX(1, (rc->arf_active_best_quality_adjustment_window - i));
+ rc->arf_increase_active_best_quality = 0;
+
+ if (!is_lossless_requested(&cpi->oxcf)) {
+ if (rc->frames_since_key >= rc->frames_to_key) {
+ // Increase the active best quality in the second half of key frame
+ // interval.
+ rc->arf_active_best_quality_adjustment_factor =
+ LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR +
+ (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) *
+ (rc->frames_to_key - gop_coding_frames) /
+ (VPXMAX(1, ((rc->frames_to_key + rc->frames_since_key) / 2 -
+ gop_coding_frames)));
+ rc->arf_increase_active_best_quality = 1;
+ } else if ((rc->frames_to_key - gop_coding_frames) > 0) {
+ // Reduce the active best quality in the first half of key frame interval.
+ rc->arf_active_best_quality_adjustment_factor =
+ LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR +
+ (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) *
+ (rc->frames_since_key + gop_coding_frames) /
+ (VPXMAX(1, (rc->frames_to_key + rc->frames_since_key) / 2 +
+ gop_coding_frames));
+ rc->arf_increase_active_best_quality = -1;
+ }
}
#ifdef AGGRESSIVE_VBR
// Limit maximum boost based on interval length.
- rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 140);
+ rc->gfu_boost = VPXMIN((int)rc->gfu_boost, gop_coding_frames * 140);
#else
- rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 200);
+ rc->gfu_boost = VPXMIN((int)rc->gfu_boost, gop_coding_frames * 200);
#endif
// Cap the ARF boost when perceptual quality AQ mode is enabled. This is
@@ -2674,14 +2782,34 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
if (oxcf->aq_mode == PERCEPTUAL_AQ)
rc->gfu_boost = VPXMIN(rc->gfu_boost, MIN_ARF_GF_BOOST);
- rc->baseline_gf_interval = i - rc->source_alt_ref_pending;
-
- // Reset the file position.
- reset_fpf_position(twopass, start_pos);
+ rc->baseline_gf_interval = gop_coding_frames - rc->source_alt_ref_pending;
if (rc->source_alt_ref_pending)
is_alt_ref_flash = detect_flash(twopass, rc->baseline_gf_interval);
+ {
+ const double av_err = get_distribution_av_err(cpi, twopass);
+ const double mean_mod_score = twopass->mean_mod_score;
+ // If the first frame is a key frame or the overlay from a previous arf then
+ // the error score / cost of this frame has already been accounted for.
+ int start_idx = arf_active_or_kf ? 1 : 0;
+ int j;
+ for (j = start_idx; j < gop_coding_frames; ++j) {
+ int show_idx = gf_start_show_idx + j;
+ const FIRSTPASS_STATS *frame_stats =
+ fps_get_frame_stats(first_pass_info, show_idx);
+ // Accumulate error score of frames in this gf group.
+ gf_group_err += calc_norm_frame_score(oxcf, frame_info, frame_stats,
+ mean_mod_score, av_err);
+ gf_group_raw_error += frame_stats->coded_error;
+ gf_group_noise += frame_stats->frame_noise_energy;
+ gf_group_skip_pct += frame_stats->intra_skip_pct;
+ gf_group_inactive_zone_rows += frame_stats->inactive_zone_rows;
+ gf_group_inter += frame_stats->pcnt_inter;
+ gf_group_motion += frame_stats->pcnt_motion;
+ }
+ }
+
// Calculate the bits to be allocated to the gf/arf group as a whole
gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
@@ -2847,17 +2975,23 @@ static int intra_step_transition(const FIRSTPASS_STATS *this_frame,
// Test for very low intra complexity which could cause false key frames
#define V_LOW_INTRA 0.5
-static int test_candidate_kf(TWO_PASS *twopass,
- const FIRSTPASS_STATS *last_frame,
- const FIRSTPASS_STATS *this_frame,
- const FIRSTPASS_STATS *next_frame) {
+static int test_candidate_kf(const FIRST_PASS_INFO *first_pass_info,
+ int show_idx) {
+ const FIRSTPASS_STATS *last_frame =
+ fps_get_frame_stats(first_pass_info, show_idx - 1);
+ const FIRSTPASS_STATS *this_frame =
+ fps_get_frame_stats(first_pass_info, show_idx);
+ const FIRSTPASS_STATS *next_frame =
+ fps_get_frame_stats(first_pass_info, show_idx + 1);
int is_viable_kf = 0;
double pcnt_intra = 1.0 - this_frame->pcnt_inter;
// Does the frame satisfy the primary criteria of a key frame?
// See above for an explanation of the test criteria.
// If so, then examine how well it predicts subsequent frames.
- if (!detect_flash(twopass, -1) && !detect_flash(twopass, 0) &&
+ detect_flash_from_frame_stats(next_frame);
+ if (!detect_flash_from_frame_stats(this_frame) &&
+ !detect_flash_from_frame_stats(next_frame) &&
(this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
(slide_transition(this_frame, last_frame, next_frame)) ||
@@ -2870,42 +3004,41 @@ static int test_candidate_kf(TWO_PASS *twopass,
DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) <
KF_II_ERR_THRESHOLD)))) {
int i;
- const FIRSTPASS_STATS *start_pos = twopass->stats_in;
- FIRSTPASS_STATS local_next_frame = *next_frame;
double boost_score = 0.0;
double old_boost_score = 0.0;
double decay_accumulator = 1.0;
// Examine how well the key frame predicts subsequent frames.
for (i = 0; i < 16; ++i) {
- double next_iiratio = (II_FACTOR * local_next_frame.intra_error /
- DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error));
+ const FIRSTPASS_STATS *frame_stats =
+ fps_get_frame_stats(first_pass_info, show_idx + 1 + i);
+ double next_iiratio = (II_FACTOR * frame_stats->intra_error /
+ DOUBLE_DIVIDE_CHECK(frame_stats->coded_error));
if (next_iiratio > KF_II_MAX) next_iiratio = KF_II_MAX;
// Cumulative effect of decay in prediction quality.
- if (local_next_frame.pcnt_inter > 0.85)
- decay_accumulator *= local_next_frame.pcnt_inter;
+ if (frame_stats->pcnt_inter > 0.85)
+ decay_accumulator *= frame_stats->pcnt_inter;
else
- decay_accumulator *= (0.85 + local_next_frame.pcnt_inter) / 2.0;
+ decay_accumulator *= (0.85 + frame_stats->pcnt_inter) / 2.0;
// Keep a running total.
boost_score += (decay_accumulator * next_iiratio);
// Test various breakout clauses.
- if ((local_next_frame.pcnt_inter < 0.05) || (next_iiratio < 1.5) ||
- (((local_next_frame.pcnt_inter - local_next_frame.pcnt_neutral) <
- 0.20) &&
+ if ((frame_stats->pcnt_inter < 0.05) || (next_iiratio < 1.5) ||
+ (((frame_stats->pcnt_inter - frame_stats->pcnt_neutral) < 0.20) &&
(next_iiratio < 3.0)) ||
((boost_score - old_boost_score) < 3.0) ||
- (local_next_frame.intra_error < V_LOW_INTRA)) {
+ (frame_stats->intra_error < V_LOW_INTRA)) {
break;
}
old_boost_score = boost_score;
// Get the next frame details
- if (EOF == input_stats(twopass, &local_next_frame)) break;
+ if (show_idx + 1 + i == fps_get_num_frames(first_pass_info) - 1) break;
}
// If there is tolerable prediction for at least the next 3 frames then
@@ -2913,9 +3046,6 @@ static int test_candidate_kf(TWO_PASS *twopass,
if (boost_score > 30.0 && (i > 3)) {
is_viable_kf = 1;
} else {
- // Reset the file position
- reset_fpf_position(twopass, start_pos);
-
is_viable_kf = 0;
}
}
@@ -2938,19 +3068,80 @@ static int test_candidate_kf(TWO_PASS *twopass,
#define MAX_KF_TOT_BOOST 5400
#endif
-static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
- int i, j;
+int vp9_get_frames_to_next_key(const VP9EncoderConfig *oxcf,
+ const FRAME_INFO *frame_info,
+ const FIRST_PASS_INFO *first_pass_info,
+ int kf_show_idx, int min_gf_interval) {
+ double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
+ int j;
+ int frames_to_key;
+ int max_frames_to_key = first_pass_info->num_frames - kf_show_idx;
+ max_frames_to_key = VPXMIN(max_frames_to_key, oxcf->key_freq);
+
+ // Initialize the decay rates for the recent frames to check
+ for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0;
+ // Find the next keyframe.
+ if (!oxcf->auto_key) {
+ frames_to_key = max_frames_to_key;
+ } else {
+ frames_to_key = 1;
+ while (frames_to_key < max_frames_to_key) {
+ // Provided that we are not at the end of the file...
+ if (kf_show_idx + frames_to_key + 1 < first_pass_info->num_frames) {
+ double loop_decay_rate;
+ double decay_accumulator;
+ const FIRSTPASS_STATS *next_frame = fps_get_frame_stats(
+ first_pass_info, kf_show_idx + frames_to_key + 1);
+
+ // Check for a scene cut.
+ if (test_candidate_kf(first_pass_info, kf_show_idx + frames_to_key))
+ break;
+
+ // How fast is the prediction quality decaying?
+ loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame);
+
+ // We want to know something about the recent past... rather than
+ // as used elsewhere where we are concerned with decay in prediction
+ // quality since the last GF or KF.
+ recent_loop_decay[(frames_to_key - 1) % FRAMES_TO_CHECK_DECAY] =
+ loop_decay_rate;
+ decay_accumulator = 1.0;
+ for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j)
+ decay_accumulator *= recent_loop_decay[j];
+
+ // Special check for transition or high motion followed by a
+ // static scene.
+ if ((frames_to_key - 1) > min_gf_interval && loop_decay_rate >= 0.999 &&
+ decay_accumulator < 0.9) {
+ int still_interval = oxcf->key_freq - (frames_to_key - 1);
+ // TODO(angiebird): Figure out why we use "+1" here
+ int show_idx = kf_show_idx + frames_to_key;
+ if (check_transition_to_still(first_pass_info, show_idx,
+ still_interval)) {
+ break;
+ }
+ }
+ }
+ ++frames_to_key;
+ }
+ }
+ return frames_to_key;
+}
+
+static void find_next_key_frame(VP9_COMP *cpi, int kf_show_idx) {
+ int i;
RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
- const FIRSTPASS_STATS first_frame = *this_frame;
+ const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;
+ const FRAME_INFO *frame_info = &cpi->frame_info;
const FIRSTPASS_STATS *const start_position = twopass->stats_in;
+ const FIRSTPASS_STATS *keyframe_stats =
+ fps_get_frame_stats(first_pass_info, kf_show_idx);
FIRSTPASS_STATS next_frame;
- FIRSTPASS_STATS last_frame;
int kf_bits = 0;
int64_t max_kf_bits;
- double decay_accumulator = 1.0;
double zero_motion_accumulator = 1.0;
double zero_motion_sum = 0.0;
double zero_motion_avg;
@@ -2962,10 +3153,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double kf_mod_err = 0.0;
double kf_raw_err = 0.0;
double kf_group_err = 0.0;
- double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
double sr_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
const double av_err = get_distribution_av_err(cpi, twopass);
+ const double mean_mod_score = twopass->mean_mod_score;
vp9_zero(next_frame);
cpi->common.frame_type = KEY_FRAME;
@@ -2989,96 +3180,29 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->kf_group_bits = 0; // Total bits available to kf group
twopass->kf_group_error_left = 0.0; // Group modified error score.
- kf_raw_err = this_frame->intra_error;
- kf_mod_err =
- calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
-
- // Initialize the decay rates for the recent frames to check
- for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0;
-
- // Find the next keyframe.
- i = 0;
- while (twopass->stats_in < twopass->stats_in_end &&
- rc->frames_to_key < cpi->oxcf.key_freq) {
- // Accumulate kf group error.
- kf_group_err +=
- calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
-
- // Load the next frame's stats.
- last_frame = *this_frame;
- input_stats(twopass, this_frame);
+ kf_raw_err = keyframe_stats->intra_error;
+ kf_mod_err = calc_norm_frame_score(oxcf, frame_info, keyframe_stats,
+ mean_mod_score, av_err);
- // Provided that we are not at the end of the file...
- if (cpi->oxcf.auto_key && twopass->stats_in < twopass->stats_in_end) {
- double loop_decay_rate;
-
- // Check for a scene cut.
- if (test_candidate_kf(twopass, &last_frame, this_frame,
- twopass->stats_in))
- break;
-
- // How fast is the prediction quality decaying?
- loop_decay_rate = get_prediction_decay_rate(cpi, twopass->stats_in);
-
- // We want to know something about the recent past... rather than
- // as used elsewhere where we are concerned with decay in prediction
- // quality since the last GF or KF.
- recent_loop_decay[i % FRAMES_TO_CHECK_DECAY] = loop_decay_rate;
- decay_accumulator = 1.0;
- for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j)
- decay_accumulator *= recent_loop_decay[j];
-
- // Special check for transition or high motion followed by a
- // static scene.
- if (detect_transition_to_still(cpi, i, cpi->oxcf.key_freq - i,
- loop_decay_rate, decay_accumulator))
- break;
-
- // Step on to the next frame.
- ++rc->frames_to_key;
-
- // If we don't have a real key frame within the next two
- // key_freq intervals then break out of the loop.
- if (rc->frames_to_key >= 2 * cpi->oxcf.key_freq) break;
- } else {
- ++rc->frames_to_key;
- }
- ++i;
- }
+ rc->frames_to_key = vp9_get_frames_to_next_key(
+ oxcf, frame_info, first_pass_info, kf_show_idx, rc->min_gf_interval);
// If there is a max kf interval set by the user we must obey it.
// We already breakout of the loop above at 2x max.
// This code centers the extra kf if the actual natural interval
// is between 1x and 2x.
- if (cpi->oxcf.auto_key && rc->frames_to_key > cpi->oxcf.key_freq) {
- FIRSTPASS_STATS tmp_frame = first_frame;
-
- rc->frames_to_key /= 2;
-
- // Reset to the start of the group.
- reset_fpf_position(twopass, start_position);
-
- kf_group_err = 0.0;
-
- // Rescan to get the correct error data for the forced kf group.
- for (i = 0; i < rc->frames_to_key; ++i) {
- kf_group_err +=
- calculate_norm_frame_score(cpi, twopass, oxcf, &tmp_frame, av_err);
- input_stats(twopass, &tmp_frame);
- }
- rc->next_key_frame_forced = 1;
- } else if (twopass->stats_in == twopass->stats_in_end ||
- rc->frames_to_key >= cpi->oxcf.key_freq) {
+ if (rc->frames_to_key >= cpi->oxcf.key_freq) {
rc->next_key_frame_forced = 1;
} else {
rc->next_key_frame_forced = 0;
}
- // Special case for the last key frame of the file.
- if (twopass->stats_in >= twopass->stats_in_end) {
+ for (i = 0; i < rc->frames_to_key; ++i) {
+ const FIRSTPASS_STATS *frame_stats =
+ fps_get_frame_stats(first_pass_info, kf_show_idx + i);
// Accumulate kf group error.
- kf_group_err +=
- calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
+ kf_group_err += calc_norm_frame_score(oxcf, frame_info, frame_stats,
+ mean_mod_score, av_err);
}
// Calculate the number of bits that should be assigned to the kf group.
@@ -3103,9 +3227,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
twopass->kf_group_bits = VPXMAX(0, twopass->kf_group_bits);
- // Reset the first pass file position.
- reset_fpf_position(twopass, start_position);
-
// Scan through the kf group collating various stats used to determine
// how many bits to spend on it.
boost_score = 0.0;
@@ -3144,8 +3265,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Monitor for static sections.
// First frame in kf group the second ref indicator is invalid.
if (i > 0) {
- zero_motion_accumulator = VPXMIN(
- zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+ zero_motion_accumulator =
+ VPXMIN(zero_motion_accumulator,
+ get_zero_motion_factor(&cpi->frame_info, &next_frame));
} else {
zero_motion_accumulator =
next_frame.pcnt_inter - next_frame.pcnt_motion;
@@ -3214,6 +3336,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
gf_group->bit_allocation[0] = kf_bits;
gf_group->update_type[0] = KF_UPDATE;
gf_group->rf_level[0] = KF_STD;
+ gf_group->layer_depth[0] = 0;
// Note the total error score of the kf group minus the key frame itself.
twopass->kf_group_error_left = (kf_group_err - kf_mod_err);
@@ -3227,11 +3350,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Default to normal-sized frame on keyframes.
cpi->rc.next_frame_size_selector = UNSCALED;
}
-#define ARF_ACTIVE_BEST_QUALITY_ADJUSTMENT_WINDOW_SIZE 64
- // TODO(ravi.chaudhary@ittiam.com): Experiment without the below min
- // condition. This might be helpful for small key frame intervals.
- rc->arf_active_best_quality_adjustment_window =
- VPXMIN(ARF_ACTIVE_BEST_QUALITY_ADJUSTMENT_WINDOW_SIZE, rc->frames_to_key);
}
static int is_skippable_frame(const VP9_COMP *cpi) {
@@ -3259,6 +3377,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
FIRSTPASS_STATS this_frame;
+ const int show_idx = cm->current_video_frame;
if (!twopass->stats_in) return;
@@ -3341,18 +3460,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
// Keyframe and section processing.
if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
- FIRSTPASS_STATS this_frame_copy;
- this_frame_copy = this_frame;
// Define next KF group and assign bits to it.
- find_next_key_frame(cpi, &this_frame);
- this_frame = this_frame_copy;
+ find_next_key_frame(cpi, show_idx);
} else {
cm->frame_type = INTER_FRAME;
}
// Define a new GF/ARF group. (Should always enter here for key frames).
if (rc->frames_till_gf_update_due == 0) {
- define_gf_group(cpi, &this_frame);
+ define_gf_group(cpi, show_idx);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
@@ -3509,3 +3625,70 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
}
}
}
+
+#if CONFIG_RATE_CTRL
+// Under CONFIG_RATE_CTRL, once the first_pass_info is ready, the number of
+// coding frames (including show frame and alt ref) can be determined.
+int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf,
+ const FRAME_INFO *frame_info,
+ const FIRST_PASS_INFO *first_pass_info,
+ int multi_layer_arf, int allow_alt_ref) {
+ int coding_frame_num = 0;
+ RATE_CONTROL rc;
+ RANGE active_gf_interval;
+ int arf_layers;
+ double gop_intra_factor;
+ int use_alt_ref;
+ int gop_coding_frames;
+ int gop_show_frames;
+ int show_idx = 0;
+ int arf_active_or_kf = 1;
+ rc.static_scene_max_gf_interval = 250;
+ vp9_rc_init(oxcf, 1, &rc);
+
+ while (show_idx < first_pass_info->num_frames) {
+ if (rc.frames_to_key == 0) {
+ rc.frames_to_key = vp9_get_frames_to_next_key(
+ oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval);
+ arf_active_or_kf = 1;
+ } else {
+ }
+
+ {
+ int dummy = 0;
+ active_gf_interval = get_active_gf_inverval_range(
+ frame_info, &rc, arf_active_or_kf, show_idx, dummy, dummy);
+ }
+
+ arf_layers = get_arf_layers(multi_layer_arf, oxcf->enable_auto_arf,
+ active_gf_interval.max);
+ if (multi_layer_arf) {
+ gop_intra_factor = 1.0 + 0.25 * arf_layers;
+ } else {
+ gop_intra_factor = 1.0;
+ }
+
+ gop_coding_frames = get_gop_coding_frame_num(
+ &use_alt_ref, frame_info, first_pass_info, &rc, show_idx,
+ &active_gf_interval, gop_intra_factor, oxcf->lag_in_frames);
+
+ use_alt_ref &= allow_alt_ref;
+
+ rc.source_alt_ref_active = use_alt_ref;
+ arf_active_or_kf = use_alt_ref;
+ gop_show_frames = gop_coding_frames - use_alt_ref;
+ rc.frames_to_key -= gop_show_frames;
+ rc.frames_since_key += gop_show_frames;
+ show_idx += gop_show_frames;
+ coding_frame_num += gop_show_frames + use_alt_ref;
+ }
+ return coding_frame_num;
+}
+#endif
+
+FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass) {
+ return twopass->this_frame_stats;
+}
+FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *twopass) {
+ return twopass->total_stats;
+}
diff --git a/libvpx/vp9/encoder/vp9_firstpass.h b/libvpx/vp9/encoder/vp9_firstpass.h
index a0a96e6ef..cfbc143c3 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.h
+++ b/libvpx/vp9/encoder/vp9_firstpass.h
@@ -13,6 +13,7 @@
#include <assert.h>
+#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/encoder/vp9_lookahead.h"
#include "vp9/encoder/vp9_ratectrl.h"
@@ -147,6 +148,30 @@ typedef struct {
} GF_GROUP;
typedef struct {
+ const FIRSTPASS_STATS *stats;
+ int num_frames;
+} FIRST_PASS_INFO;
+
+static INLINE void fps_init_first_pass_info(FIRST_PASS_INFO *first_pass_info,
+ const FIRSTPASS_STATS *stats,
+ int num_frames) {
+ first_pass_info->stats = stats;
+ first_pass_info->num_frames = num_frames;
+}
+
+static INLINE int fps_get_num_frames(const FIRST_PASS_INFO *first_pass_info) {
+ return first_pass_info->num_frames;
+}
+
+static INLINE const FIRSTPASS_STATS *fps_get_frame_stats(
+ const FIRST_PASS_INFO *first_pass_info, int show_idx) {
+ if (show_idx < 0 || show_idx >= first_pass_info->num_frames) {
+ return NULL;
+ }
+ return &first_pass_info->stats[show_idx];
+}
+
+typedef struct {
unsigned int section_intra_rating;
unsigned int key_frame_section_intra_rating;
FIRSTPASS_STATS total_stats;
@@ -154,6 +179,7 @@ typedef struct {
const FIRSTPASS_STATS *stats_in;
const FIRSTPASS_STATS *stats_in_start;
const FIRSTPASS_STATS *stats_in_end;
+ FIRST_PASS_INFO first_pass_info;
FIRSTPASS_STATS total_left_stats;
int first_pass_done;
int64_t bits_left;
@@ -192,6 +218,7 @@ typedef struct {
int extend_maxq;
int extend_minq_fast;
int arnr_strength_adjustment;
+ int last_qindex_of_arf_layer[MAX_ARF_LAYERS];
GF_GROUP gf_group;
} TWO_PASS;
@@ -219,6 +246,21 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width,
int *scaled_frame_height);
+struct VP9EncoderConfig;
+int vp9_get_frames_to_next_key(const struct VP9EncoderConfig *oxcf,
+ const FRAME_INFO *frame_info,
+ const FIRST_PASS_INFO *first_pass_info,
+ int kf_show_idx, int min_gf_interval);
+#if CONFIG_RATE_CTRL
+int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf,
+ const FRAME_INFO *frame_info,
+ const FIRST_PASS_INFO *first_pass_info,
+ int multi_layer_arf, int allow_alt_ref);
+#endif
+
+FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *two_pass);
+FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *two_pass);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/libvpx/vp9/encoder/vp9_lookahead.c b/libvpx/vp9/encoder/vp9_lookahead.c
index 392cd5d41..97838c38e 100644
--- a/libvpx/vp9/encoder/vp9_lookahead.c
+++ b/libvpx/vp9/encoder/vp9_lookahead.c
@@ -64,6 +64,7 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
unsigned int i;
ctx->max_sz = depth;
ctx->buf = calloc(depth, sizeof(*ctx->buf));
+ ctx->next_show_idx = 0;
if (!ctx->buf) goto bail;
for (i = 0; i < depth; i++)
if (vpx_alloc_frame_buffer(
@@ -81,12 +82,16 @@ bail:
}
#define USE_PARTIAL_COPY 0
+int vp9_lookahead_full(const struct lookahead_ctx *ctx) {
+ return ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz;
+}
+
+int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx) {
+ return ctx->next_show_idx;
+}
int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
- int64_t ts_start, int64_t ts_end,
-#if CONFIG_VP9_HIGHBITDEPTH
- int use_highbitdepth,
-#endif
+ int64_t ts_start, int64_t ts_end, int use_highbitdepth,
vpx_enc_frame_flags_t flags) {
struct lookahead_entry *buf;
#if USE_PARTIAL_COPY
@@ -101,8 +106,12 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
int subsampling_x = src->subsampling_x;
int subsampling_y = src->subsampling_y;
int larger_dimensions, new_dimensions;
+#if !CONFIG_VP9_HIGHBITDEPTH
+ (void)use_highbitdepth;
+ assert(use_highbitdepth == 0);
+#endif
- if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1;
+ if (vp9_lookahead_full(ctx)) return 1;
ctx->sz++;
buf = pop(ctx, &ctx->write_idx);
@@ -184,6 +193,8 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
buf->ts_start = ts_start;
buf->ts_end = ts_end;
buf->flags = flags;
+ buf->show_idx = ctx->next_show_idx;
+ ++ctx->next_show_idx;
return 0;
}
diff --git a/libvpx/vp9/encoder/vp9_lookahead.h b/libvpx/vp9/encoder/vp9_lookahead.h
index c627bede2..dbbe3af58 100644
--- a/libvpx/vp9/encoder/vp9_lookahead.h
+++ b/libvpx/vp9/encoder/vp9_lookahead.h
@@ -25,6 +25,7 @@ struct lookahead_entry {
YV12_BUFFER_CONFIG img;
int64_t ts_start;
int64_t ts_end;
+ int show_idx; /*The show_idx of this frame*/
vpx_enc_frame_flags_t flags;
};
@@ -32,10 +33,12 @@ struct lookahead_entry {
#define MAX_PRE_FRAMES 1
struct lookahead_ctx {
- int max_sz; /* Absolute size of the queue */
- int sz; /* Number of buffers currently in the queue */
- int read_idx; /* Read index */
- int write_idx; /* Write index */
+ int max_sz; /* Absolute size of the queue */
+ int sz; /* Number of buffers currently in the queue */
+ int read_idx; /* Read index */
+ int write_idx; /* Write index */
+ int next_show_idx; /* The show_idx that will be assigned to the next frame
+ being pushed in the queue*/
struct lookahead_entry *buf; /* Buffer list */
};
@@ -57,6 +60,23 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
*/
void vp9_lookahead_destroy(struct lookahead_ctx *ctx);
+/**\brief Check if lookahead is full
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ *
+ * Return 1 if lookahead is full, otherwise return 0.
+ */
+int vp9_lookahead_full(const struct lookahead_ctx *ctx);
+
+/**\brief Return the next_show_idx
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ *
+ * Return the show_idx that will be assigned to the next
+ * frame pushed by vp9_lookahead_push()
+ */
+int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx);
+
/**\brief Enqueue a source buffer
*
* This function will copy the source image into a new framebuffer with
@@ -73,10 +93,7 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx);
* \param[in] active_map Map that specifies which macroblock is active
*/
int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
- int64_t ts_start, int64_t ts_end,
-#if CONFIG_VP9_HIGHBITDEPTH
- int use_highbitdepth,
-#endif
+ int64_t ts_start, int64_t ts_end, int use_highbitdepth,
vpx_enc_frame_flags_t flags);
/**\brief Get the next source buffer to encode
diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c
index d1688f993..ac29f36ec 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/libvpx/vp9/encoder/vp9_mcomp.c
@@ -1731,239 +1731,99 @@ static int exhaustive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
#define MAX_RANGE 256
#define MIN_INTERVAL 1
#if CONFIG_NON_GREEDY_MV
-
-#define LOG2_TABLE_SIZE 1024
-static const int log2_table[LOG2_TABLE_SIZE] = {
- 0, // This is a dummy value
- 0, 1048576, 1661954, 2097152, 2434718, 2710530, 2943725,
- 3145728, 3323907, 3483294, 3627477, 3759106, 3880192, 3992301,
- 4096672, 4194304, 4286015, 4372483, 4454275, 4531870, 4605679,
- 4676053, 4743299, 4807682, 4869436, 4928768, 4985861, 5040877,
- 5093962, 5145248, 5194851, 5242880, 5289431, 5334591, 5378443,
- 5421059, 5462508, 5502851, 5542146, 5580446, 5617800, 5654255,
- 5689851, 5724629, 5758625, 5791875, 5824409, 5856258, 5887450,
- 5918012, 5947969, 5977344, 6006160, 6034437, 6062195, 6089453,
- 6116228, 6142538, 6168398, 6193824, 6218829, 6243427, 6267632,
- 6291456, 6314910, 6338007, 6360756, 6383167, 6405252, 6427019,
- 6448477, 6469635, 6490501, 6511084, 6531390, 6551427, 6571202,
- 6590722, 6609993, 6629022, 6647815, 6666376, 6684713, 6702831,
- 6720734, 6738427, 6755916, 6773205, 6790299, 6807201, 6823917,
- 6840451, 6856805, 6872985, 6888993, 6904834, 6920510, 6936026,
- 6951384, 6966588, 6981641, 6996545, 7011304, 7025920, 7040397,
- 7054736, 7068940, 7083013, 7096956, 7110771, 7124461, 7138029,
- 7151476, 7164804, 7178017, 7191114, 7204100, 7216974, 7229740,
- 7242400, 7254954, 7267405, 7279754, 7292003, 7304154, 7316208,
- 7328167, 7340032, 7351805, 7363486, 7375079, 7386583, 7398000,
- 7409332, 7420579, 7431743, 7442826, 7453828, 7464751, 7475595,
- 7486362, 7497053, 7507669, 7518211, 7528680, 7539077, 7549404,
- 7559660, 7569847, 7579966, 7590017, 7600003, 7609923, 7619778,
- 7629569, 7639298, 7648964, 7658569, 7668114, 7677598, 7687023,
- 7696391, 7705700, 7714952, 7724149, 7733289, 7742375, 7751407,
- 7760385, 7769310, 7778182, 7787003, 7795773, 7804492, 7813161,
- 7821781, 7830352, 7838875, 7847350, 7855777, 7864158, 7872493,
- 7880782, 7889027, 7897226, 7905381, 7913492, 7921561, 7929586,
- 7937569, 7945510, 7953410, 7961268, 7969086, 7976864, 7984602,
- 7992301, 7999960, 8007581, 8015164, 8022709, 8030217, 8037687,
- 8045121, 8052519, 8059880, 8067206, 8074496, 8081752, 8088973,
- 8096159, 8103312, 8110431, 8117516, 8124569, 8131589, 8138576,
- 8145532, 8152455, 8159347, 8166208, 8173037, 8179836, 8186605,
- 8193343, 8200052, 8206731, 8213380, 8220001, 8226593, 8233156,
- 8239690, 8246197, 8252676, 8259127, 8265550, 8271947, 8278316,
- 8284659, 8290976, 8297266, 8303530, 8309768, 8315981, 8322168,
- 8328330, 8334467, 8340579, 8346667, 8352730, 8358769, 8364784,
- 8370775, 8376743, 8382687, 8388608, 8394506, 8400381, 8406233,
- 8412062, 8417870, 8423655, 8429418, 8435159, 8440878, 8446576,
- 8452252, 8457908, 8463542, 8469155, 8474748, 8480319, 8485871,
- 8491402, 8496913, 8502404, 8507875, 8513327, 8518759, 8524171,
- 8529564, 8534938, 8540293, 8545629, 8550947, 8556245, 8561525,
- 8566787, 8572031, 8577256, 8582464, 8587653, 8592825, 8597980,
- 8603116, 8608236, 8613338, 8618423, 8623491, 8628542, 8633576,
- 8638593, 8643594, 8648579, 8653547, 8658499, 8663434, 8668354,
- 8673258, 8678145, 8683017, 8687874, 8692715, 8697540, 8702350,
- 8707145, 8711925, 8716690, 8721439, 8726174, 8730894, 8735599,
- 8740290, 8744967, 8749628, 8754276, 8758909, 8763528, 8768134,
- 8772725, 8777302, 8781865, 8786415, 8790951, 8795474, 8799983,
- 8804478, 8808961, 8813430, 8817886, 8822328, 8826758, 8831175,
- 8835579, 8839970, 8844349, 8848715, 8853068, 8857409, 8861737,
- 8866053, 8870357, 8874649, 8878928, 8883195, 8887451, 8891694,
- 8895926, 8900145, 8904353, 8908550, 8912734, 8916908, 8921069,
- 8925220, 8929358, 8933486, 8937603, 8941708, 8945802, 8949885,
- 8953957, 8958018, 8962068, 8966108, 8970137, 8974155, 8978162,
- 8982159, 8986145, 8990121, 8994086, 8998041, 9001986, 9005920,
- 9009844, 9013758, 9017662, 9021556, 9025440, 9029314, 9033178,
- 9037032, 9040877, 9044711, 9048536, 9052352, 9056157, 9059953,
- 9063740, 9067517, 9071285, 9075044, 9078793, 9082533, 9086263,
- 9089985, 9093697, 9097400, 9101095, 9104780, 9108456, 9112123,
- 9115782, 9119431, 9123072, 9126704, 9130328, 9133943, 9137549,
- 9141146, 9144735, 9148316, 9151888, 9155452, 9159007, 9162554,
- 9166092, 9169623, 9173145, 9176659, 9180165, 9183663, 9187152,
- 9190634, 9194108, 9197573, 9201031, 9204481, 9207923, 9211357,
- 9214784, 9218202, 9221613, 9225017, 9228412, 9231800, 9235181,
- 9238554, 9241919, 9245277, 9248628, 9251971, 9255307, 9258635,
- 9261956, 9265270, 9268577, 9271876, 9275169, 9278454, 9281732,
- 9285002, 9288266, 9291523, 9294773, 9298016, 9301252, 9304481,
- 9307703, 9310918, 9314126, 9317328, 9320523, 9323711, 9326892,
- 9330067, 9333235, 9336397, 9339552, 9342700, 9345842, 9348977,
- 9352106, 9355228, 9358344, 9361454, 9364557, 9367654, 9370744,
- 9373828, 9376906, 9379978, 9383043, 9386102, 9389155, 9392202,
- 9395243, 9398278, 9401306, 9404329, 9407345, 9410356, 9413360,
- 9416359, 9419351, 9422338, 9425319, 9428294, 9431263, 9434226,
- 9437184, 9440136, 9443082, 9446022, 9448957, 9451886, 9454809,
- 9457726, 9460638, 9463545, 9466446, 9469341, 9472231, 9475115,
- 9477994, 9480867, 9483735, 9486597, 9489454, 9492306, 9495152,
- 9497993, 9500828, 9503659, 9506484, 9509303, 9512118, 9514927,
- 9517731, 9520530, 9523324, 9526112, 9528895, 9531674, 9534447,
- 9537215, 9539978, 9542736, 9545489, 9548237, 9550980, 9553718,
- 9556451, 9559179, 9561903, 9564621, 9567335, 9570043, 9572747,
- 9575446, 9578140, 9580830, 9583514, 9586194, 9588869, 9591540,
- 9594205, 9596866, 9599523, 9602174, 9604821, 9607464, 9610101,
- 9612735, 9615363, 9617987, 9620607, 9623222, 9625832, 9628438,
- 9631040, 9633637, 9636229, 9638818, 9641401, 9643981, 9646556,
- 9649126, 9651692, 9654254, 9656812, 9659365, 9661914, 9664459,
- 9666999, 9669535, 9672067, 9674594, 9677118, 9679637, 9682152,
- 9684663, 9687169, 9689672, 9692170, 9694665, 9697155, 9699641,
- 9702123, 9704601, 9707075, 9709545, 9712010, 9714472, 9716930,
- 9719384, 9721834, 9724279, 9726721, 9729159, 9731593, 9734024,
- 9736450, 9738872, 9741291, 9743705, 9746116, 9748523, 9750926,
- 9753326, 9755721, 9758113, 9760501, 9762885, 9765266, 9767642,
- 9770015, 9772385, 9774750, 9777112, 9779470, 9781825, 9784175,
- 9786523, 9788866, 9791206, 9793543, 9795875, 9798204, 9800530,
- 9802852, 9805170, 9807485, 9809797, 9812104, 9814409, 9816710,
- 9819007, 9821301, 9823591, 9825878, 9828161, 9830441, 9832718,
- 9834991, 9837261, 9839527, 9841790, 9844050, 9846306, 9848559,
- 9850808, 9853054, 9855297, 9857537, 9859773, 9862006, 9864235,
- 9866462, 9868685, 9870904, 9873121, 9875334, 9877544, 9879751,
- 9881955, 9884155, 9886352, 9888546, 9890737, 9892925, 9895109,
- 9897291, 9899469, 9901644, 9903816, 9905985, 9908150, 9910313,
- 9912473, 9914629, 9916783, 9918933, 9921080, 9923225, 9925366,
- 9927504, 9929639, 9931771, 9933900, 9936027, 9938150, 9940270,
- 9942387, 9944502, 9946613, 9948721, 9950827, 9952929, 9955029,
- 9957126, 9959219, 9961310, 9963398, 9965484, 9967566, 9969645,
- 9971722, 9973796, 9975866, 9977934, 9980000, 9982062, 9984122,
- 9986179, 9988233, 9990284, 9992332, 9994378, 9996421, 9998461,
- 10000498, 10002533, 10004565, 10006594, 10008621, 10010644, 10012665,
- 10014684, 10016700, 10018713, 10020723, 10022731, 10024736, 10026738,
- 10028738, 10030735, 10032729, 10034721, 10036710, 10038697, 10040681,
- 10042662, 10044641, 10046617, 10048591, 10050562, 10052530, 10054496,
- 10056459, 10058420, 10060379, 10062334, 10064287, 10066238, 10068186,
- 10070132, 10072075, 10074016, 10075954, 10077890, 10079823, 10081754,
- 10083682, 10085608, 10087532, 10089453, 10091371, 10093287, 10095201,
- 10097112, 10099021, 10100928, 10102832, 10104733, 10106633, 10108529,
- 10110424, 10112316, 10114206, 10116093, 10117978, 10119861, 10121742,
- 10123620, 10125495, 10127369, 10129240, 10131109, 10132975, 10134839,
- 10136701, 10138561, 10140418, 10142273, 10144126, 10145976, 10147825,
- 10149671, 10151514, 10153356, 10155195, 10157032, 10158867, 10160699,
- 10162530, 10164358, 10166184, 10168007, 10169829, 10171648, 10173465,
- 10175280, 10177093, 10178904, 10180712, 10182519, 10184323, 10186125,
- 10187925, 10189722, 10191518, 10193311, 10195103, 10196892, 10198679,
- 10200464, 10202247, 10204028, 10205806, 10207583, 10209357, 10211130,
- 10212900, 10214668, 10216435, 10218199, 10219961, 10221721, 10223479,
- 10225235, 10226989, 10228741, 10230491, 10232239, 10233985, 10235728,
- 10237470, 10239210, 10240948, 10242684, 10244417, 10246149, 10247879,
- 10249607, 10251333, 10253057, 10254779, 10256499, 10258217, 10259933,
- 10261647, 10263360, 10265070, 10266778, 10268485, 10270189, 10271892,
- 10273593, 10275292, 10276988, 10278683, 10280376, 10282068, 10283757,
- 10285444, 10287130, 10288814, 10290495, 10292175, 10293853, 10295530,
- 10297204, 10298876, 10300547, 10302216, 10303883, 10305548, 10307211,
- 10308873, 10310532, 10312190, 10313846, 10315501, 10317153, 10318804,
- 10320452, 10322099, 10323745, 10325388, 10327030, 10328670, 10330308,
- 10331944, 10333578, 10335211, 10336842, 10338472, 10340099, 10341725,
- 10343349, 10344971, 10346592, 10348210, 10349828, 10351443, 10353057,
- 10354668, 10356279, 10357887, 10359494, 10361099, 10362702, 10364304,
- 10365904, 10367502, 10369099, 10370694, 10372287, 10373879, 10375468,
- 10377057, 10378643, 10380228, 10381811, 10383393, 10384973, 10386551,
- 10388128, 10389703, 10391276, 10392848, 10394418, 10395986, 10397553,
- 10399118, 10400682, 10402244, 10403804, 10405363, 10406920, 10408476,
- 10410030, 10411582, 10413133, 10414682, 10416230, 10417776, 10419320,
- 10420863, 10422404, 10423944, 10425482, 10427019, 10428554, 10430087,
- 10431619, 10433149, 10434678, 10436206, 10437731, 10439256, 10440778,
- 10442299, 10443819, 10445337, 10446854, 10448369, 10449882, 10451394,
- 10452905, 10454414, 10455921, 10457427, 10458932, 10460435, 10461936,
- 10463436, 10464935, 10466432, 10467927, 10469422, 10470914, 10472405,
- 10473895, 10475383, 10476870, 10478355, 10479839, 10481322, 10482802,
- 10484282,
-};
-
-#define LOG2_PRECISION 20
-static int64_t log2_approximation(int64_t v) {
- assert(v > 0);
- if (v < LOG2_TABLE_SIZE) {
- return log2_table[v];
- } else {
- // use linear approximation when v >= 2^10
- const int slope =
- 1477; // slope = 1 / (log(2) * 1024) * (1 << LOG2_PRECISION)
- assert(LOG2_TABLE_SIZE == 1 << 10);
-
- return slope * (v - LOG2_TABLE_SIZE) + (10 << LOG2_PRECISION);
- }
-}
-
-int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
- int mv_num) {
- int i;
- int update = 0;
- int64_t best_cost = 0;
- vpx_clear_system_state();
- for (i = 0; i < mv_num; ++i) {
- if (nb_mvs[i].as_int != INVALID_MV) {
- MV nb_mv = nb_mvs[i].as_mv;
- const int64_t row_diff = abs(mv->row - nb_mv.row);
- const int64_t col_diff = abs(mv->col - nb_mv.col);
- const int64_t cost =
- log2_approximation(1 + row_diff * row_diff + col_diff * col_diff);
- if (update == 0) {
- best_cost = cost;
- update = 1;
- } else {
- best_cost = cost < best_cost ? cost : best_cost;
+static int64_t exhaustive_mesh_search_multi_step(
+ MV *best_mv, const MV *center_mv, int range, int step,
+ const struct buf_2d *src, const struct buf_2d *pre, int lambda,
+ const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits,
+ const vp9_variance_fn_ptr_t *fn_ptr) {
+ int64_t best_sad;
+ int r, c;
+ int start_col, end_col, start_row, end_row;
+ *best_mv = *center_mv;
+ best_sad =
+ ((int64_t)fn_ptr->sdf(src->buf, src->stride,
+ get_buf_from_mv(pre, center_mv), pre->stride)
+ << LOG2_PRECISION) +
+ lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
+ start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
+ start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
+ end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
+ end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
+ for (r = start_row; r <= end_row; r += step) {
+ for (c = start_col; c <= end_col; c += step) {
+ const MV mv = { r, c };
+ int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
+ get_buf_from_mv(pre, &mv), pre->stride)
+ << LOG2_PRECISION;
+ if (sad < best_sad) {
+ sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
}
}
}
- return best_cost;
+ return best_sad;
}
-static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
- int range, int step,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv, int lambda,
- const int_mv *nb_full_mvs,
- int full_mv_num) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- MV fcenter_mv = { center_mv->row, center_mv->col };
+static int64_t exhaustive_mesh_search_single_step(
+ MV *best_mv, const MV *center_mv, int range, const struct buf_2d *src,
+ const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs,
+ int full_mv_num, const MvLimits *mv_limits,
+ const vp9_variance_fn_ptr_t *fn_ptr) {
int64_t best_sad;
int r, c, i;
int start_col, end_col, start_row, end_row;
- int col_step = (step > 1) ? step : 4;
- assert(step >= 1);
-
- clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
- x->mv_limits.row_min, x->mv_limits.row_max);
- *best_mv = fcenter_mv;
+ *best_mv = *center_mv;
best_sad =
- ((int64_t)fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &fcenter_mv),
- in_what->stride)
+ ((int64_t)fn_ptr->sdf(src->buf, src->stride,
+ get_buf_from_mv(pre, center_mv), pre->stride)
<< LOG2_PRECISION) +
- lambda * vp9_nb_mvs_inconsistency(&fcenter_mv, nb_full_mvs, full_mv_num);
- start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
- start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
- end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
- end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
+ lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
+ start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
+ start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
+ end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
+ end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
+ for (r = start_row; r <= end_row; r += 1) {
+ c = start_col;
+ // sdx8f may not be available some block size
+ if (fn_ptr->sdx8f) {
+ while (c + 7 <= end_col) {
+ unsigned int sads[8];
+ const MV mv = { r, c };
+ const uint8_t *buf = get_buf_from_mv(pre, &mv);
+ fn_ptr->sdx8f(src->buf, src->stride, buf, pre->stride, sads);
+
+ for (i = 0; i < 8; ++i) {
+ int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
+ if (sad < best_sad) {
+ const MV mv = { r, c + i };
+ sad += lambda *
+ vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ }
+ c += 8;
+ }
+ }
+ while (c + 3 <= end_col) {
+ unsigned int sads[4];
+ const uint8_t *addrs[4];
+ for (i = 0; i < 4; ++i) {
+ const MV mv = { r, c + i };
+ addrs[i] = get_buf_from_mv(pre, &mv);
+ }
+ fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads);
- for (r = start_row; r <= end_row; r += step) {
- for (c = start_col; c <= end_col; c += col_step) {
- // Step > 1 means we are not checking every location in this pass.
- if (step > 1) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
- int64_t sad =
- (int64_t)fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride)
- << LOG2_PRECISION;
+ for (i = 0; i < 4; ++i) {
+ int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
if (sad < best_sad) {
+ const MV mv = { r, c + i };
sad +=
lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
if (sad < best_sad) {
@@ -1971,53 +1831,48 @@ static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
*best_mv = mv;
}
}
- } else {
- // 4 sads in a single call if we are checking every location
- if (c + 3 <= end_col) {
- unsigned int sads[4];
- const uint8_t *addrs[4];
- for (i = 0; i < 4; ++i) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
- addrs[i] = get_buf_from_mv(in_what, &mv);
- }
- fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
-
- for (i = 0; i < 4; ++i) {
- int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
- if (sad < best_sad) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
- sad += lambda *
- vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
- if (sad < best_sad) {
- best_sad = sad;
- *best_mv = mv;
- }
- }
- }
- } else {
- for (i = 0; i < end_col - c; ++i) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
- int64_t sad = (int64_t)fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv),
- in_what->stride)
- << LOG2_PRECISION;
- if (sad < best_sad) {
- sad += lambda *
- vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
- if (sad < best_sad) {
- best_sad = sad;
- *best_mv = mv;
- }
- }
- }
+ }
+ c += 4;
+ }
+ while (c <= end_col) {
+ const MV mv = { r, c };
+ int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
+ get_buf_from_mv(pre, &mv), pre->stride)
+ << LOG2_PRECISION;
+ if (sad < best_sad) {
+ sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
}
}
+ c += 1;
}
}
-
return best_sad;
}
+static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
+ int range, int step,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, int lambda,
+ const int_mv *nb_full_mvs,
+ int full_mv_num) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *src = &x->plane[0].src;
+ const struct buf_2d *pre = &xd->plane[0].pre[0];
+ assert(step >= 1);
+ assert(is_mv_in(&x->mv_limits, center_mv));
+ if (step == 1) {
+ return exhaustive_mesh_search_single_step(
+ best_mv, center_mv, range, src, pre, lambda, nb_full_mvs, full_mv_num,
+ &x->mv_limits, fn_ptr);
+ }
+ return exhaustive_mesh_search_multi_step(best_mv, center_mv, range, step, src,
+ pre, lambda, nb_full_mvs,
+ full_mv_num, &x->mv_limits, fn_ptr);
+}
+
static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
MV *centre_mv_full,
const vp9_variance_fn_ptr_t *fn_ptr,
@@ -2031,7 +1886,6 @@ static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
int interval = sf->mesh_patterns[0].interval;
int range = sf->mesh_patterns[0].range;
int baseline_interval_divisor;
- const MV dummy_mv = { 0, 0 };
// Trap illegal values for interval and range for this function.
if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
@@ -2067,19 +1921,18 @@ static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
}
}
- bestsme = vp9_get_mvpred_var(x, &temp_mv, &dummy_mv, fn_ptr, 0);
*dst_mv = temp_mv;
return bestsme;
}
-static double diamond_search_sad_new(const MACROBLOCK *x,
- const search_site_config *cfg,
- const MV *init_full_mv, MV *best_full_mv,
- int search_param, int lambda, int *num00,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const int_mv *nb_full_mvs,
- int full_mv_num) {
+static int64_t diamond_search_sad_new(const MACROBLOCK *x,
+ const search_site_config *cfg,
+ const MV *init_full_mv, MV *best_full_mv,
+ int search_param, int lambda, int *num00,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs,
+ int full_mv_num) {
int i, j, step;
const MACROBLOCKD *const xd = &x->e_mbd;
@@ -2089,7 +1942,7 @@ static double diamond_search_sad_new(const MACROBLOCK *x,
const int in_what_stride = xd->plane[0].pre[0].stride;
const uint8_t *best_address;
- double bestsad;
+ int64_t bestsad;
int best_site = -1;
int last_site = -1;
@@ -2116,11 +1969,11 @@ static double diamond_search_sad_new(const MACROBLOCK *x,
// Check the starting position
{
- const double mv_dist =
- fn_ptr->sdf(what, what_stride, in_what, in_what_stride);
- const double mv_cost =
- vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
+ const int64_t mv_dist =
+ (int64_t)fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ << LOG2_PRECISION;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
bestsad = mv_dist + lambda * mv_cost;
}
@@ -2151,14 +2004,13 @@ static double diamond_search_sad_new(const MACROBLOCK *x,
sad_array);
for (t = 0; t < 4; t++, i++) {
- if (sad_array[t] < bestsad) {
+ const int64_t mv_dist = (int64_t)sad_array[t] << LOG2_PRECISION;
+ if (mv_dist < bestsad) {
const MV this_mv = { best_full_mv->row + ss_mv[i].row,
best_full_mv->col + ss_mv[i].col };
- const double mv_dist = sad_array[t];
- const double mv_cost =
- vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
- double thissad = mv_dist + lambda * mv_cost;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
+ const int64_t thissad = mv_dist + lambda * mv_cost;
if (thissad < bestsad) {
bestsad = thissad;
best_site = i;
@@ -2174,13 +2026,14 @@ static double diamond_search_sad_new(const MACROBLOCK *x,
if (is_mv_in(&x->mv_limits, &this_mv)) {
const uint8_t *const check_here = ss_os[i] + best_address;
- const double mv_dist =
- fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
+ const int64_t mv_dist =
+ (int64_t)fn_ptr->sdf(what, what_stride, check_here,
+ in_what_stride)
+ << LOG2_PRECISION;
if (mv_dist < bestsad) {
- const double mv_cost =
- vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
- double thissad = mv_dist + lambda * mv_cost;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
+ const int64_t thissad = mv_dist + lambda * mv_cost;
if (thissad < bestsad) {
bestsad = thissad;
best_site = i;
@@ -2202,32 +2055,30 @@ static double diamond_search_sad_new(const MACROBLOCK *x,
return bestsad;
}
-void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
- int mi_col, int rf_idx, BLOCK_SIZE bsize,
- int_mv *nb_full_mvs) {
- const int mi_width = num_8x8_blocks_wide_lookup[bsize];
- const int mi_height = num_8x8_blocks_high_lookup[bsize];
+int vp9_prepare_nb_full_mvs(const MotionField *motion_field, int mi_row,
+ int mi_col, int_mv *nb_full_mvs) {
+ const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize];
const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
+ int nb_full_mv_num = 0;
int i;
+ assert(mi_row % mi_height == 0);
+ assert(mi_col % mi_width == 0);
for (i = 0; i < NB_MVS_NUM; ++i) {
- int r = dirs[i][0] * mi_height;
- int c = dirs[i][1] * mi_width;
- if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
- mi_col + c < tpl_frame->mi_cols) {
- const TplDepStats *tpl_ptr =
- &tpl_frame
- ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];
- int_mv *mv =
- get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);
- if (tpl_ptr->ready[rf_idx]) {
- nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv);
- } else {
- nb_full_mvs[i].as_int = INVALID_MV;
+ int r = dirs[i][0];
+ int c = dirs[i][1];
+ int brow = mi_row / mi_height + r;
+ int bcol = mi_col / mi_width + c;
+ if (brow >= 0 && brow < motion_field->block_rows && bcol >= 0 &&
+ bcol < motion_field->block_cols) {
+ if (vp9_motion_field_is_mv_set(motion_field, brow, bcol)) {
+ int_mv mv = vp9_motion_field_get_mv(motion_field, brow, bcol);
+ nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv.as_mv);
+ ++nb_full_mv_num;
}
- } else {
- nb_full_mvs[i].as_int = INVALID_MV;
}
}
+ return nb_full_mv_num;
}
#endif // CONFIG_NON_GREEDY_MV
@@ -2585,26 +2436,32 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
return best_sad;
}
+static int get_exhaustive_threshold(int exhaustive_searches_thresh,
+ BLOCK_SIZE bsize) {
+ return exhaustive_searches_thresh >>
+ (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
+}
+
#if CONFIG_NON_GREEDY_MV
// Runs sequence of diamond searches in smaller steps for RD.
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
point as the best match, we will do a final 1-away diamond
refining search */
-double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param, int lambda,
- int do_refine,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const int_mv *nb_full_mvs, int full_mv_num,
- MV *best_mv) {
+int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+ int lambda, int do_refine,
+ const int_mv *nb_full_mvs, int full_mv_num,
+ MV *best_mv) {
+ const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
+ const SPEED_FEATURES *const sf = &cpi->sf;
int n, num00 = 0;
- double thissme;
- double bestsme;
+ int thissme;
+ int bestsme;
const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param;
const MV center_mv = { 0, 0 };
vpx_clear_system_state();
- bestsme =
- diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param,
- lambda, &n, fn_ptr, nb_full_mvs, full_mv_num);
+ diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param, lambda,
+ &n, fn_ptr, nb_full_mvs, full_mv_num);
bestsme = vp9_get_mvpred_var(x, best_mv, &center_mv, fn_ptr, 0);
@@ -2618,9 +2475,9 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
num00--;
} else {
MV temp_mv;
- thissme = diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv,
- step_param + n, lambda, &num00, fn_ptr,
- nb_full_mvs, full_mv_num);
+ diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv,
+ step_param + n, lambda, &num00, fn_ptr,
+ nb_full_mvs, full_mv_num);
thissme = vp9_get_mvpred_var(x, &temp_mv, &center_mv, fn_ptr, 0);
// check to see if refining search is needed.
if (num00 > further_steps - n) do_refine = 0;
@@ -2636,8 +2493,8 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
if (do_refine) {
const int search_range = 8;
MV temp_mv = *best_mv;
- thissme = vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range,
- fn_ptr, nb_full_mvs, full_mv_num);
+ vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range, fn_ptr,
+ nb_full_mvs, full_mv_num);
thissme = vp9_get_mvpred_var(x, &temp_mv, &center_mv, fn_ptr, 0);
if (thissme < bestsme) {
bestsme = thissme;
@@ -2645,8 +2502,16 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
}
}
- bestsme = (double)full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv,
- lambda, nb_full_mvs, full_mv_num);
+ if (sf->exhaustive_searches_thresh < INT_MAX &&
+ !cpi->rc.is_src_frame_alt_ref) {
+ const int64_t exhaustive_thr =
+ get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
+ if (bestsme > exhaustive_thr) {
+ full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda,
+ nb_full_mvs, full_mv_num);
+ bestsme = vp9_get_mvpred_var(x, best_mv, &center_mv, fn_ptr, 0);
+ }
+ }
return bestsme;
}
#endif // CONFIG_NON_GREEDY_MV
@@ -2774,24 +2639,25 @@ static int full_pixel_exhaustive(const VP9_COMP *const cpi,
}
#if CONFIG_NON_GREEDY_MV
-double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
- int lambda, int search_range,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const int_mv *nb_full_mvs, int full_mv_num) {
+int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
+ int lambda, int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs,
+ int full_mv_num) {
const MACROBLOCKD *const xd = &x->e_mbd;
const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv);
- double best_sad;
+ int64_t best_sad;
int i, j;
vpx_clear_system_state();
{
- const double mv_dist =
- fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride);
- const double mv_cost =
- vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
+ const int64_t mv_dist = (int64_t)fn_ptr->sdf(what->buf, what->stride,
+ best_address, in_what->stride)
+ << LOG2_PRECISION;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
best_sad = mv_dist + lambda * mv_cost;
}
@@ -2813,11 +2679,10 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
for (j = 0; j < 4; ++j) {
const MV mv = { best_full_mv->row + neighbors[j].row,
best_full_mv->col + neighbors[j].col };
- const double mv_dist = sads[j];
- const double mv_cost =
- vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
- const double thissad = mv_dist + lambda * mv_cost;
+ const int64_t mv_dist = (int64_t)sads[j] << LOG2_PRECISION;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ const int64_t thissad = mv_dist + lambda * mv_cost;
if (thissad < best_sad) {
best_sad = thissad;
best_site = j;
@@ -2829,13 +2694,14 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
best_full_mv->col + neighbors[j].col };
if (is_mv_in(&x->mv_limits, &mv)) {
- const double mv_dist =
- fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride);
- const double mv_cost =
- vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
- const double thissad = mv_dist + lambda * mv_cost;
+ const int64_t mv_dist =
+ (int64_t)fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv),
+ in_what->stride)
+ << LOG2_PRECISION;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ const int64_t thissad = mv_dist + lambda * mv_cost;
if (thissad < best_sad) {
best_sad = thissad;
best_site = j;
@@ -3034,9 +2900,10 @@ int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x,
if (sf->exhaustive_searches_thresh < INT_MAX &&
!cpi->rc.is_src_frame_alt_ref) {
const int64_t exhaustive_thr =
- sf->exhaustive_searches_thresh >>
- (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
- if (var > exhaustive_thr) run_exhaustive_search = 1;
+ get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
+ if (var > exhaustive_thr) {
+ run_exhaustive_search = 1;
+ }
}
} else if (method == MESH) {
run_exhaustive_search = 1;
diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h
index cafa2d150..0c4d8f23c 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/libvpx/vp9/encoder/vp9_mcomp.h
@@ -12,6 +12,9 @@
#define VPX_VP9_ENCODER_VP9_MCOMP_H_
#include "vp9/encoder/vp9_block.h"
+#if CONFIG_NON_GREEDY_MV
+#include "vp9/encoder/vp9_non_greedy_mv.h"
+#endif // CONFIG_NON_GREEDY_MV
#include "vpx_dsp/variance.h"
#ifdef __cplusplus
@@ -126,22 +129,18 @@ void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits,
const MV *ref_mv);
#if CONFIG_NON_GREEDY_MV
-#define NB_MVS_NUM 4
struct TplDepStats;
-double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
- int lambda, int search_range,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const int_mv *nb_full_mvs, int full_mv_num);
-
-double vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param, int lambda,
- int do_refine,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const int_mv *nb_full_mvs, int full_mv_num,
- MV *best_mv);
-
-int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
- int mv_num);
+int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
+ int lambda, int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs, int full_mv_num);
+
+int vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+ int lambda, int do_refine,
+ const int_mv *nb_full_mvs, int full_mv_num,
+ MV *best_mv);
+
static INLINE MV get_full_mv(const MV *mv) {
MV out_mv;
out_mv.row = mv->row >> 3;
@@ -149,9 +148,8 @@ static INLINE MV get_full_mv(const MV *mv) {
return out_mv;
}
struct TplDepFrame;
-void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,
- int mi_col, int rf_idx, BLOCK_SIZE bsize,
- int_mv *nb_full_mvs);
+int vp9_prepare_nb_full_mvs(const struct MotionField *motion_field, int mi_row,
+ int mi_col, int_mv *nb_full_mvs);
static INLINE BLOCK_SIZE get_square_block_size(BLOCK_SIZE bsize) {
BLOCK_SIZE square_bsize;
diff --git a/libvpx/vp9/encoder/vp9_non_greedy_mv.c b/libvpx/vp9/encoder/vp9_non_greedy_mv.c
new file mode 100644
index 000000000..4679d6c49
--- /dev/null
+++ b/libvpx/vp9/encoder/vp9_non_greedy_mv.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_mv.h"
+#include "vp9/encoder/vp9_non_greedy_mv.h"
+// TODO(angiebird): move non_greedy_mv related functions to this file
+
+#define LOG2_TABLE_SIZE 1024
+static const int log2_table[LOG2_TABLE_SIZE] = {
+ 0, // This is a dummy value
+ 0, 1048576, 1661954, 2097152, 2434718, 2710530, 2943725,
+ 3145728, 3323907, 3483294, 3627477, 3759106, 3880192, 3992301,
+ 4096672, 4194304, 4286015, 4372483, 4454275, 4531870, 4605679,
+ 4676053, 4743299, 4807682, 4869436, 4928768, 4985861, 5040877,
+ 5093962, 5145248, 5194851, 5242880, 5289431, 5334591, 5378443,
+ 5421059, 5462508, 5502851, 5542146, 5580446, 5617800, 5654255,
+ 5689851, 5724629, 5758625, 5791875, 5824409, 5856258, 5887450,
+ 5918012, 5947969, 5977344, 6006160, 6034437, 6062195, 6089453,
+ 6116228, 6142538, 6168398, 6193824, 6218829, 6243427, 6267632,
+ 6291456, 6314910, 6338007, 6360756, 6383167, 6405252, 6427019,
+ 6448477, 6469635, 6490501, 6511084, 6531390, 6551427, 6571202,
+ 6590722, 6609993, 6629022, 6647815, 6666376, 6684713, 6702831,
+ 6720734, 6738427, 6755916, 6773205, 6790299, 6807201, 6823917,
+ 6840451, 6856805, 6872985, 6888993, 6904834, 6920510, 6936026,
+ 6951384, 6966588, 6981641, 6996545, 7011304, 7025920, 7040397,
+ 7054736, 7068940, 7083013, 7096956, 7110771, 7124461, 7138029,
+ 7151476, 7164804, 7178017, 7191114, 7204100, 7216974, 7229740,
+ 7242400, 7254954, 7267405, 7279754, 7292003, 7304154, 7316208,
+ 7328167, 7340032, 7351805, 7363486, 7375079, 7386583, 7398000,
+ 7409332, 7420579, 7431743, 7442826, 7453828, 7464751, 7475595,
+ 7486362, 7497053, 7507669, 7518211, 7528680, 7539077, 7549404,
+ 7559660, 7569847, 7579966, 7590017, 7600003, 7609923, 7619778,
+ 7629569, 7639298, 7648964, 7658569, 7668114, 7677598, 7687023,
+ 7696391, 7705700, 7714952, 7724149, 7733289, 7742375, 7751407,
+ 7760385, 7769310, 7778182, 7787003, 7795773, 7804492, 7813161,
+ 7821781, 7830352, 7838875, 7847350, 7855777, 7864158, 7872493,
+ 7880782, 7889027, 7897226, 7905381, 7913492, 7921561, 7929586,
+ 7937569, 7945510, 7953410, 7961268, 7969086, 7976864, 7984602,
+ 7992301, 7999960, 8007581, 8015164, 8022709, 8030217, 8037687,
+ 8045121, 8052519, 8059880, 8067206, 8074496, 8081752, 8088973,
+ 8096159, 8103312, 8110431, 8117516, 8124569, 8131589, 8138576,
+ 8145532, 8152455, 8159347, 8166208, 8173037, 8179836, 8186605,
+ 8193343, 8200052, 8206731, 8213380, 8220001, 8226593, 8233156,
+ 8239690, 8246197, 8252676, 8259127, 8265550, 8271947, 8278316,
+ 8284659, 8290976, 8297266, 8303530, 8309768, 8315981, 8322168,
+ 8328330, 8334467, 8340579, 8346667, 8352730, 8358769, 8364784,
+ 8370775, 8376743, 8382687, 8388608, 8394506, 8400381, 8406233,
+ 8412062, 8417870, 8423655, 8429418, 8435159, 8440878, 8446576,
+ 8452252, 8457908, 8463542, 8469155, 8474748, 8480319, 8485871,
+ 8491402, 8496913, 8502404, 8507875, 8513327, 8518759, 8524171,
+ 8529564, 8534938, 8540293, 8545629, 8550947, 8556245, 8561525,
+ 8566787, 8572031, 8577256, 8582464, 8587653, 8592825, 8597980,
+ 8603116, 8608236, 8613338, 8618423, 8623491, 8628542, 8633576,
+ 8638593, 8643594, 8648579, 8653547, 8658499, 8663434, 8668354,
+ 8673258, 8678145, 8683017, 8687874, 8692715, 8697540, 8702350,
+ 8707145, 8711925, 8716690, 8721439, 8726174, 8730894, 8735599,
+ 8740290, 8744967, 8749628, 8754276, 8758909, 8763528, 8768134,
+ 8772725, 8777302, 8781865, 8786415, 8790951, 8795474, 8799983,
+ 8804478, 8808961, 8813430, 8817886, 8822328, 8826758, 8831175,
+ 8835579, 8839970, 8844349, 8848715, 8853068, 8857409, 8861737,
+ 8866053, 8870357, 8874649, 8878928, 8883195, 8887451, 8891694,
+ 8895926, 8900145, 8904353, 8908550, 8912734, 8916908, 8921069,
+ 8925220, 8929358, 8933486, 8937603, 8941708, 8945802, 8949885,
+ 8953957, 8958018, 8962068, 8966108, 8970137, 8974155, 8978162,
+ 8982159, 8986145, 8990121, 8994086, 8998041, 9001986, 9005920,
+ 9009844, 9013758, 9017662, 9021556, 9025440, 9029314, 9033178,
+ 9037032, 9040877, 9044711, 9048536, 9052352, 9056157, 9059953,
+ 9063740, 9067517, 9071285, 9075044, 9078793, 9082533, 9086263,
+ 9089985, 9093697, 9097400, 9101095, 9104780, 9108456, 9112123,
+ 9115782, 9119431, 9123072, 9126704, 9130328, 9133943, 9137549,
+ 9141146, 9144735, 9148316, 9151888, 9155452, 9159007, 9162554,
+ 9166092, 9169623, 9173145, 9176659, 9180165, 9183663, 9187152,
+ 9190634, 9194108, 9197573, 9201031, 9204481, 9207923, 9211357,
+ 9214784, 9218202, 9221613, 9225017, 9228412, 9231800, 9235181,
+ 9238554, 9241919, 9245277, 9248628, 9251971, 9255307, 9258635,
+ 9261956, 9265270, 9268577, 9271876, 9275169, 9278454, 9281732,
+ 9285002, 9288266, 9291523, 9294773, 9298016, 9301252, 9304481,
+ 9307703, 9310918, 9314126, 9317328, 9320523, 9323711, 9326892,
+ 9330067, 9333235, 9336397, 9339552, 9342700, 9345842, 9348977,
+ 9352106, 9355228, 9358344, 9361454, 9364557, 9367654, 9370744,
+ 9373828, 9376906, 9379978, 9383043, 9386102, 9389155, 9392202,
+ 9395243, 9398278, 9401306, 9404329, 9407345, 9410356, 9413360,
+ 9416359, 9419351, 9422338, 9425319, 9428294, 9431263, 9434226,
+ 9437184, 9440136, 9443082, 9446022, 9448957, 9451886, 9454809,
+ 9457726, 9460638, 9463545, 9466446, 9469341, 9472231, 9475115,
+ 9477994, 9480867, 9483735, 9486597, 9489454, 9492306, 9495152,
+ 9497993, 9500828, 9503659, 9506484, 9509303, 9512118, 9514927,
+ 9517731, 9520530, 9523324, 9526112, 9528895, 9531674, 9534447,
+ 9537215, 9539978, 9542736, 9545489, 9548237, 9550980, 9553718,
+ 9556451, 9559179, 9561903, 9564621, 9567335, 9570043, 9572747,
+ 9575446, 9578140, 9580830, 9583514, 9586194, 9588869, 9591540,
+ 9594205, 9596866, 9599523, 9602174, 9604821, 9607464, 9610101,
+ 9612735, 9615363, 9617987, 9620607, 9623222, 9625832, 9628438,
+ 9631040, 9633637, 9636229, 9638818, 9641401, 9643981, 9646556,
+ 9649126, 9651692, 9654254, 9656812, 9659365, 9661914, 9664459,
+ 9666999, 9669535, 9672067, 9674594, 9677118, 9679637, 9682152,
+ 9684663, 9687169, 9689672, 9692170, 9694665, 9697155, 9699641,
+ 9702123, 9704601, 9707075, 9709545, 9712010, 9714472, 9716930,
+ 9719384, 9721834, 9724279, 9726721, 9729159, 9731593, 9734024,
+ 9736450, 9738872, 9741291, 9743705, 9746116, 9748523, 9750926,
+ 9753326, 9755721, 9758113, 9760501, 9762885, 9765266, 9767642,
+ 9770015, 9772385, 9774750, 9777112, 9779470, 9781825, 9784175,
+ 9786523, 9788866, 9791206, 9793543, 9795875, 9798204, 9800530,
+ 9802852, 9805170, 9807485, 9809797, 9812104, 9814409, 9816710,
+ 9819007, 9821301, 9823591, 9825878, 9828161, 9830441, 9832718,
+ 9834991, 9837261, 9839527, 9841790, 9844050, 9846306, 9848559,
+ 9850808, 9853054, 9855297, 9857537, 9859773, 9862006, 9864235,
+ 9866462, 9868685, 9870904, 9873121, 9875334, 9877544, 9879751,
+ 9881955, 9884155, 9886352, 9888546, 9890737, 9892925, 9895109,
+ 9897291, 9899469, 9901644, 9903816, 9905985, 9908150, 9910313,
+ 9912473, 9914629, 9916783, 9918933, 9921080, 9923225, 9925366,
+ 9927504, 9929639, 9931771, 9933900, 9936027, 9938150, 9940270,
+ 9942387, 9944502, 9946613, 9948721, 9950827, 9952929, 9955029,
+ 9957126, 9959219, 9961310, 9963398, 9965484, 9967566, 9969645,
+ 9971722, 9973796, 9975866, 9977934, 9980000, 9982062, 9984122,
+ 9986179, 9988233, 9990284, 9992332, 9994378, 9996421, 9998461,
+ 10000498, 10002533, 10004565, 10006594, 10008621, 10010644, 10012665,
+ 10014684, 10016700, 10018713, 10020723, 10022731, 10024736, 10026738,
+ 10028738, 10030735, 10032729, 10034721, 10036710, 10038697, 10040681,
+ 10042662, 10044641, 10046617, 10048591, 10050562, 10052530, 10054496,
+ 10056459, 10058420, 10060379, 10062334, 10064287, 10066238, 10068186,
+ 10070132, 10072075, 10074016, 10075954, 10077890, 10079823, 10081754,
+ 10083682, 10085608, 10087532, 10089453, 10091371, 10093287, 10095201,
+ 10097112, 10099021, 10100928, 10102832, 10104733, 10106633, 10108529,
+ 10110424, 10112316, 10114206, 10116093, 10117978, 10119861, 10121742,
+ 10123620, 10125495, 10127369, 10129240, 10131109, 10132975, 10134839,
+ 10136701, 10138561, 10140418, 10142273, 10144126, 10145976, 10147825,
+ 10149671, 10151514, 10153356, 10155195, 10157032, 10158867, 10160699,
+ 10162530, 10164358, 10166184, 10168007, 10169829, 10171648, 10173465,
+ 10175280, 10177093, 10178904, 10180712, 10182519, 10184323, 10186125,
+ 10187925, 10189722, 10191518, 10193311, 10195103, 10196892, 10198679,
+ 10200464, 10202247, 10204028, 10205806, 10207583, 10209357, 10211130,
+ 10212900, 10214668, 10216435, 10218199, 10219961, 10221721, 10223479,
+ 10225235, 10226989, 10228741, 10230491, 10232239, 10233985, 10235728,
+ 10237470, 10239210, 10240948, 10242684, 10244417, 10246149, 10247879,
+ 10249607, 10251333, 10253057, 10254779, 10256499, 10258217, 10259933,
+ 10261647, 10263360, 10265070, 10266778, 10268485, 10270189, 10271892,
+ 10273593, 10275292, 10276988, 10278683, 10280376, 10282068, 10283757,
+ 10285444, 10287130, 10288814, 10290495, 10292175, 10293853, 10295530,
+ 10297204, 10298876, 10300547, 10302216, 10303883, 10305548, 10307211,
+ 10308873, 10310532, 10312190, 10313846, 10315501, 10317153, 10318804,
+ 10320452, 10322099, 10323745, 10325388, 10327030, 10328670, 10330308,
+ 10331944, 10333578, 10335211, 10336842, 10338472, 10340099, 10341725,
+ 10343349, 10344971, 10346592, 10348210, 10349828, 10351443, 10353057,
+ 10354668, 10356279, 10357887, 10359494, 10361099, 10362702, 10364304,
+ 10365904, 10367502, 10369099, 10370694, 10372287, 10373879, 10375468,
+ 10377057, 10378643, 10380228, 10381811, 10383393, 10384973, 10386551,
+ 10388128, 10389703, 10391276, 10392848, 10394418, 10395986, 10397553,
+ 10399118, 10400682, 10402244, 10403804, 10405363, 10406920, 10408476,
+ 10410030, 10411582, 10413133, 10414682, 10416230, 10417776, 10419320,
+ 10420863, 10422404, 10423944, 10425482, 10427019, 10428554, 10430087,
+ 10431619, 10433149, 10434678, 10436206, 10437731, 10439256, 10440778,
+ 10442299, 10443819, 10445337, 10446854, 10448369, 10449882, 10451394,
+ 10452905, 10454414, 10455921, 10457427, 10458932, 10460435, 10461936,
+ 10463436, 10464935, 10466432, 10467927, 10469422, 10470914, 10472405,
+ 10473895, 10475383, 10476870, 10478355, 10479839, 10481322, 10482802,
+ 10484282,
+};
+
+static int mi_size_to_block_size(int mi_bsize, int mi_num) {
+ return (mi_num % mi_bsize) ? mi_num / mi_bsize + 1 : mi_num / mi_bsize;
+}
+
+Status vp9_alloc_motion_field_info(MotionFieldInfo *motion_field_info,
+ int frame_num, int mi_rows, int mi_cols) {
+ int frame_idx, rf_idx, square_block_idx;
+ if (motion_field_info->allocated) {
+ // TODO(angiebird): Avoid re-allocate buffer if possible
+ vp9_free_motion_field_info(motion_field_info);
+ }
+ motion_field_info->frame_num = frame_num;
+ motion_field_info->motion_field_array =
+ vpx_calloc(frame_num, sizeof(*motion_field_info->motion_field_array));
+ for (frame_idx = 0; frame_idx < frame_num; ++frame_idx) {
+ for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
+ for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
+ ++square_block_idx) {
+ BLOCK_SIZE bsize = square_block_idx_to_bsize(square_block_idx);
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int block_rows = mi_size_to_block_size(mi_height, mi_rows);
+ const int block_cols = mi_size_to_block_size(mi_width, mi_cols);
+ MotionField *motion_field =
+ &motion_field_info
+ ->motion_field_array[frame_idx][rf_idx][square_block_idx];
+ Status status =
+ vp9_alloc_motion_field(motion_field, bsize, block_rows, block_cols);
+ if (status == STATUS_FAILED) {
+ return STATUS_FAILED;
+ }
+ }
+ }
+ }
+ motion_field_info->allocated = 1;
+ return STATUS_OK;
+}
+
+Status vp9_alloc_motion_field(MotionField *motion_field, BLOCK_SIZE bsize,
+ int block_rows, int block_cols) {
+ Status status = STATUS_OK;
+ motion_field->ready = 0;
+ motion_field->bsize = bsize;
+ motion_field->block_rows = block_rows;
+ motion_field->block_cols = block_cols;
+ motion_field->block_num = block_rows * block_cols;
+ motion_field->mf =
+ vpx_calloc(motion_field->block_num, sizeof(*motion_field->mf));
+ if (motion_field->mf == NULL) {
+ status = STATUS_FAILED;
+ }
+ motion_field->set_mv =
+ vpx_calloc(motion_field->block_num, sizeof(*motion_field->set_mv));
+ if (motion_field->set_mv == NULL) {
+ vpx_free(motion_field->mf);
+ motion_field->mf = NULL;
+ status = STATUS_FAILED;
+ }
+ motion_field->local_structure = vpx_calloc(
+ motion_field->block_num, sizeof(*motion_field->local_structure));
+ if (motion_field->local_structure == NULL) {
+ vpx_free(motion_field->mf);
+ motion_field->mf = NULL;
+ vpx_free(motion_field->set_mv);
+ motion_field->set_mv = NULL;
+ status = STATUS_FAILED;
+ }
+ return status;
+}
+
+void vp9_free_motion_field(MotionField *motion_field) {
+ vpx_free(motion_field->mf);
+ vpx_free(motion_field->set_mv);
+ vpx_free(motion_field->local_structure);
+ vp9_zero(*motion_field);
+}
+
+void vp9_free_motion_field_info(MotionFieldInfo *motion_field_info) {
+ if (motion_field_info->allocated) {
+ int frame_idx, rf_idx, square_block_idx;
+ for (frame_idx = 0; frame_idx < motion_field_info->frame_num; ++frame_idx) {
+ for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
+ for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
+ ++square_block_idx) {
+ MotionField *motion_field =
+ &motion_field_info
+ ->motion_field_array[frame_idx][rf_idx][square_block_idx];
+ vp9_free_motion_field(motion_field);
+ }
+ }
+ }
+ vpx_free(motion_field_info->motion_field_array);
+ motion_field_info->motion_field_array = NULL;
+ motion_field_info->frame_num = 0;
+ motion_field_info->allocated = 0;
+ }
+}
+
+MotionField *vp9_motion_field_info_get_motion_field(
+ MotionFieldInfo *motion_field_info, int frame_idx, int rf_idx,
+ BLOCK_SIZE bsize) {
+ int square_block_idx = get_square_block_idx(bsize);
+ assert(frame_idx < motion_field_info->frame_num);
+ assert(motion_field_info->allocated == 1);
+ return &motion_field_info
+ ->motion_field_array[frame_idx][rf_idx][square_block_idx];
+}
+
+int vp9_motion_field_is_mv_set(const MotionField *motion_field, int brow,
+ int bcol) {
+ assert(brow >= 0 && brow < motion_field->block_rows);
+ assert(bcol >= 0 && bcol < motion_field->block_cols);
+ return motion_field->set_mv[brow * motion_field->block_cols + bcol];
+}
+
+int_mv vp9_motion_field_get_mv(const MotionField *motion_field, int brow,
+ int bcol) {
+ assert(brow >= 0 && brow < motion_field->block_rows);
+ assert(bcol >= 0 && bcol < motion_field->block_cols);
+ return motion_field->mf[brow * motion_field->block_cols + bcol];
+}
+
+int_mv vp9_motion_field_mi_get_mv(const MotionField *motion_field, int mi_row,
+ int mi_col) {
+ const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize];
+ const int brow = mi_row / mi_height;
+ const int bcol = mi_col / mi_width;
+ assert(mi_row % mi_height == 0);
+ assert(mi_col % mi_width == 0);
+ return vp9_motion_field_get_mv(motion_field, brow, bcol);
+}
+
+void vp9_motion_field_mi_set_mv(MotionField *motion_field, int mi_row,
+ int mi_col, int_mv mv) {
+ const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize];
+ const int brow = mi_row / mi_height;
+ const int bcol = mi_col / mi_width;
+ assert(mi_row % mi_height == 0);
+ assert(mi_col % mi_width == 0);
+ assert(brow >= 0 && brow < motion_field->block_rows);
+ assert(bcol >= 0 && bcol < motion_field->block_cols);
+ motion_field->mf[brow * motion_field->block_cols + bcol] = mv;
+ motion_field->set_mv[brow * motion_field->block_cols + bcol] = 1;
+}
+
+void vp9_motion_field_reset_mvs(MotionField *motion_field) {
+ memset(motion_field->set_mv, 0,
+ motion_field->block_num * sizeof(*motion_field->set_mv));
+}
+
+static int64_t log2_approximation(int64_t v) {
+ assert(v > 0);
+ if (v < LOG2_TABLE_SIZE) {
+ return log2_table[v];
+ } else {
+ // use linear approximation when v >= 2^10
+ const int slope =
+ 1477; // slope = 1 / (log(2) * 1024) * (1 << LOG2_PRECISION)
+ assert(LOG2_TABLE_SIZE == 1 << 10);
+
+ return slope * (v - LOG2_TABLE_SIZE) + (10 << LOG2_PRECISION);
+ }
+}
+
+int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs,
+ int mv_num) {
+ // The behavior of this function is to compute log2 of mv difference,
+ // i.e. min log2(1 + row_diff * row_diff + col_diff * col_diff)
+ // against available neighbor mvs.
+ // Since the log2 is monotonically increasing, we can compute
+ // min row_diff * row_diff + col_diff * col_diff first
+ // then apply log2 in the end.
+ int i;
+ int64_t min_abs_diff = INT64_MAX;
+ int cnt = 0;
+ assert(mv_num <= NB_MVS_NUM);
+ for (i = 0; i < mv_num; ++i) {
+ MV nb_mv = nb_full_mvs[i].as_mv;
+ const int64_t row_diff = abs(mv->row - nb_mv.row);
+ const int64_t col_diff = abs(mv->col - nb_mv.col);
+ const int64_t abs_diff = row_diff * row_diff + col_diff * col_diff;
+ assert(nb_full_mvs[i].as_int != INVALID_MV);
+ min_abs_diff = VPXMIN(abs_diff, min_abs_diff);
+ ++cnt;
+ }
+ if (cnt) {
+ return log2_approximation(1 + min_abs_diff);
+ }
+ return 0;
+}
+
+static FloatMV get_smooth_motion_vector(const FloatMV scaled_search_mv,
+ const FloatMV *tmp_mf,
+ const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
+ int rows, int cols, int row, int col,
+ float alpha) {
+ const FloatMV tmp_mv = tmp_mf[row * cols + col];
+ int idx_row, idx_col;
+ FloatMV avg_nb_mv = { 0.0f, 0.0f };
+ FloatMV mv = { 0.0f, 0.0f };
+ float filter[3][3] = { { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f },
+ { 1.0f / 6.0f, 0.0f, 1.0f / 6.0f },
+ { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f } };
+ for (idx_row = 0; idx_row < 3; ++idx_row) {
+ int nb_row = row + idx_row - 1;
+ for (idx_col = 0; idx_col < 3; ++idx_col) {
+ int nb_col = col + idx_col - 1;
+ if (nb_row < 0 || nb_col < 0 || nb_row >= rows || nb_col >= cols) {
+ avg_nb_mv.row += (tmp_mv.row) * filter[idx_row][idx_col];
+ avg_nb_mv.col += (tmp_mv.col) * filter[idx_row][idx_col];
+ } else {
+ const FloatMV nb_mv = tmp_mf[nb_row * cols + nb_col];
+ avg_nb_mv.row += (nb_mv.row) * filter[idx_row][idx_col];
+ avg_nb_mv.col += (nb_mv.col) * filter[idx_row][idx_col];
+ }
+ }
+ }
+ {
+ // M is the local variance of reference frame
+ float M00 = M[row * cols + col][0];
+ float M01 = M[row * cols + col][1];
+ float M10 = M[row * cols + col][2];
+ float M11 = M[row * cols + col][3];
+
+ float det = (M00 + alpha) * (M11 + alpha) - M01 * M10;
+
+ float inv_M00 = (M11 + alpha) / det;
+ float inv_M01 = -M01 / det;
+ float inv_M10 = -M10 / det;
+ float inv_M11 = (M00 + alpha) / det;
+
+ float inv_MM00 = inv_M00 * M00 + inv_M01 * M10;
+ float inv_MM01 = inv_M00 * M01 + inv_M01 * M11;
+ float inv_MM10 = inv_M10 * M00 + inv_M11 * M10;
+ float inv_MM11 = inv_M10 * M01 + inv_M11 * M11;
+
+ mv.row = inv_M00 * avg_nb_mv.row * alpha + inv_M01 * avg_nb_mv.col * alpha +
+ inv_MM00 * scaled_search_mv.row + inv_MM01 * scaled_search_mv.col;
+ mv.col = inv_M10 * avg_nb_mv.row * alpha + inv_M11 * avg_nb_mv.col * alpha +
+ inv_MM10 * scaled_search_mv.row + inv_MM11 * scaled_search_mv.col;
+ }
+ return mv;
+}
+
+void vp9_get_smooth_motion_field(const MV *search_mf,
+ const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
+ int rows, int cols, BLOCK_SIZE bsize,
+ float alpha, int num_iters, MV *smooth_mf) {
+ // M is the local variation of reference frame
+ // build two buffers
+ FloatMV *input = (FloatMV *)malloc(rows * cols * sizeof(FloatMV));
+ FloatMV *output = (FloatMV *)malloc(rows * cols * sizeof(FloatMV));
+ int idx;
+ int row, col;
+ int bw = 4 << b_width_log2_lookup[bsize];
+ int bh = 4 << b_height_log2_lookup[bsize];
+ // copy search results to input buffer
+ for (idx = 0; idx < rows * cols; ++idx) {
+ input[idx].row = (float)search_mf[idx].row / bh;
+ input[idx].col = (float)search_mf[idx].col / bw;
+ }
+ for (idx = 0; idx < num_iters; ++idx) {
+ FloatMV *tmp;
+ for (row = 0; row < rows; ++row) {
+ for (col = 0; col < cols; ++col) {
+ // note: the scaled_search_mf and smooth_mf are all scaled by macroblock
+ // size
+ const MV search_mv = search_mf[row * cols + col];
+ FloatMV scaled_search_mv = { (float)search_mv.row / bh,
+ (float)search_mv.col / bw };
+ output[row * cols + col] = get_smooth_motion_vector(
+ scaled_search_mv, input, M, rows, cols, row, col, alpha);
+ }
+ }
+ // swap buffers
+ tmp = input;
+ input = output;
+ output = tmp;
+ }
+ // copy smoothed results to output
+ for (idx = 0; idx < rows * cols; ++idx) {
+ smooth_mf[idx].row = (int)(input[idx].row * bh);
+ smooth_mf[idx].col = (int)(input[idx].col * bw);
+ }
+ free(input);
+ free(output);
+}
+
+void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame,
+ const YV12_BUFFER_CONFIG *ref_frame,
+ const MV *search_mf,
+ const vp9_variance_fn_ptr_t *fn_ptr, int rows,
+ int cols, BLOCK_SIZE bsize,
+ int (*M)[MF_LOCAL_STRUCTURE_SIZE]) {
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ const int cur_stride = cur_frame->y_stride;
+ const int ref_stride = ref_frame->y_stride;
+ const int width = ref_frame->y_width;
+ const int height = ref_frame->y_height;
+ int row, col;
+ for (row = 0; row < rows; ++row) {
+ for (col = 0; col < cols; ++col) {
+ int cur_offset = row * bh * cur_stride + col * bw;
+ uint8_t *center = cur_frame->y_buffer + cur_offset;
+ int ref_h = row * bh + search_mf[row * cols + col].row;
+ int ref_w = col * bw + search_mf[row * cols + col].col;
+ int ref_offset;
+ uint8_t *target;
+ uint8_t *nb;
+ int search_dist;
+ int nb_dist;
+ int I_row = 0, I_col = 0;
+ // TODO(Dan): handle the case that when reference frame block beyond the
+ // boundary
+ ref_h = ref_h < 0 ? 0 : (ref_h >= height - bh ? height - bh - 1 : ref_h);
+ ref_w = ref_w < 0 ? 0 : (ref_w >= width - bw ? width - bw - 1 : ref_w);
+ // compute search results distortion
+ // TODO(Dan): maybe need to use vp9 function to find the reference block,
+ // to compare with the results of my python code, I first use my way to
+ // compute the reference block
+ ref_offset = ref_h * ref_stride + ref_w;
+ target = ref_frame->y_buffer + ref_offset;
+ search_dist = fn_ptr->sdf(center, cur_stride, target, ref_stride);
+ // compute target's neighbors' distortions
+ // TODO(Dan): if using padding, the boundary condition may vary
+ // up
+ if (ref_h - bh >= 0) {
+ nb = target - ref_stride * bh;
+ nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+ I_row += nb_dist - search_dist;
+ }
+ // down
+ if (ref_h + bh < height - bh) {
+ nb = target + ref_stride * bh;
+ nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+ I_row += nb_dist - search_dist;
+ }
+ if (ref_h - bh >= 0 && ref_h + bh < height - bh) {
+ I_row /= 2;
+ }
+ I_row /= (bw * bh);
+ // left
+ if (ref_w - bw >= 0) {
+ nb = target - bw;
+ nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+ I_col += nb_dist - search_dist;
+ }
+ // down
+ if (ref_w + bw < width - bw) {
+ nb = target + bw;
+ nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+ I_col += nb_dist - search_dist;
+ }
+ if (ref_w - bw >= 0 && ref_w + bw < width - bw) {
+ I_col /= 2;
+ }
+ I_col /= (bw * bh);
+ M[row * cols + col][0] = I_row * I_row;
+ M[row * cols + col][1] = I_row * I_col;
+ M[row * cols + col][2] = I_col * I_row;
+ M[row * cols + col][3] = I_col * I_col;
+ }
+ }
+}
diff --git a/libvpx/vp9/encoder/vp9_non_greedy_mv.h b/libvpx/vp9/encoder/vp9_non_greedy_mv.h
new file mode 100644
index 000000000..c2bd69722
--- /dev/null
+++ b/libvpx/vp9/encoder/vp9_non_greedy_mv.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_
+#define VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_
+
+#include "vp9/common/vp9_enums.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vpx_scale/yv12config.h"
+#include "vpx_dsp/variance.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#define NB_MVS_NUM 4
+#define LOG2_PRECISION 20
+#define MF_LOCAL_STRUCTURE_SIZE 4
+#define SQUARE_BLOCK_SIZES 4
+
+typedef enum Status { STATUS_OK = 0, STATUS_FAILED = 1 } Status;
+
+typedef struct MotionField {
+ int ready;
+ BLOCK_SIZE bsize;
+ int block_rows;
+ int block_cols;
+ int block_num; // block_num == block_rows * block_cols
+ int (*local_structure)[MF_LOCAL_STRUCTURE_SIZE];
+ int_mv *mf;
+ int *set_mv;
+ int mv_log_scale;
+} MotionField;
+
+typedef struct MotionFieldInfo {
+ int frame_num;
+ int allocated;
+ MotionField (*motion_field_array)[MAX_INTER_REF_FRAMES][SQUARE_BLOCK_SIZES];
+} MotionFieldInfo;
+
+typedef struct {
+ float row, col;
+} FloatMV;
+
+static INLINE int get_square_block_idx(BLOCK_SIZE bsize) {
+ if (bsize == BLOCK_4X4) {
+ return 0;
+ }
+ if (bsize == BLOCK_8X8) {
+ return 1;
+ }
+ if (bsize == BLOCK_16X16) {
+ return 2;
+ }
+ if (bsize == BLOCK_32X32) {
+ return 3;
+ }
+ assert(0 && "ERROR: non-square block size");
+ return -1;
+}
+
+static INLINE BLOCK_SIZE square_block_idx_to_bsize(int square_block_idx) {
+ if (square_block_idx == 0) {
+ return BLOCK_4X4;
+ }
+ if (square_block_idx == 1) {
+ return BLOCK_8X8;
+ }
+ if (square_block_idx == 2) {
+ return BLOCK_16X16;
+ }
+ if (square_block_idx == 3) {
+ return BLOCK_32X32;
+ }
+ assert(0 && "ERROR: invalid square_block_idx");
+ return BLOCK_INVALID;
+}
+
+Status vp9_alloc_motion_field_info(MotionFieldInfo *motion_field_info,
+ int frame_num, int mi_rows, int mi_cols);
+
+Status vp9_alloc_motion_field(MotionField *motion_field, BLOCK_SIZE bsize,
+ int block_rows, int block_cols);
+
+void vp9_free_motion_field(MotionField *motion_field);
+
+void vp9_free_motion_field_info(MotionFieldInfo *motion_field_info);
+
+int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs,
+ int mv_num);
+
+void vp9_get_smooth_motion_field(const MV *search_mf,
+ const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
+ int rows, int cols, BLOCK_SIZE bize,
+ float alpha, int num_iters, MV *smooth_mf);
+
+void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame,
+ const YV12_BUFFER_CONFIG *ref_frame,
+ const MV *search_mf,
+ const vp9_variance_fn_ptr_t *fn_ptr, int rows,
+ int cols, BLOCK_SIZE bsize,
+ int (*M)[MF_LOCAL_STRUCTURE_SIZE]);
+
+MotionField *vp9_motion_field_info_get_motion_field(
+ MotionFieldInfo *motion_field_info, int frame_idx, int rf_idx,
+ BLOCK_SIZE bsize);
+
+void vp9_motion_field_mi_set_mv(MotionField *motion_field, int mi_row,
+ int mi_col, int_mv mv);
+
+void vp9_motion_field_reset_mvs(MotionField *motion_field);
+
+int_mv vp9_motion_field_get_mv(const MotionField *motion_field, int brow,
+ int bcol);
+int_mv vp9_motion_field_mi_get_mv(const MotionField *motion_field, int mi_row,
+ int mi_col);
+int vp9_motion_field_is_mv_set(const MotionField *motion_field, int brow,
+ int bcol);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif // VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_
diff --git a/libvpx/vp9/encoder/vp9_pickmode.c b/libvpx/vp9/encoder/vp9_pickmode.c
index 513b9f678..9b2e48505 100644
--- a/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/libvpx/vp9/encoder/vp9_pickmode.c
@@ -1501,7 +1501,8 @@ static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
int best_early_term = 0;
int best_flag_preduv_computed[2] = { 0 };
INTERP_FILTER filter_start = force_smooth_filter ? EIGHTTAP_SMOOTH : EIGHTTAP;
- for (filter = filter_start; filter <= EIGHTTAP_SMOOTH; ++filter) {
+ INTERP_FILTER filter_end = EIGHTTAP_SMOOTH;
+ for (filter = filter_start; filter <= filter_end; ++filter) {
int64_t cost;
mi->interp_filter = filter;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
@@ -1531,9 +1532,11 @@ static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
free_pred_buffer(*this_mode_pred);
*this_mode_pred = current_pred;
}
- current_pred = &tmp[get_pred_buffer(tmp, 3)];
- pd->dst.buf = current_pred->data;
- pd->dst.stride = bw;
+ if (filter != filter_end) {
+ current_pred = &tmp[get_pred_buffer(tmp, 3)];
+ pd->dst.buf = current_pred->data;
+ pd->dst.stride = bw;
+ }
}
}
}
@@ -1554,6 +1557,9 @@ static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
if (reuse_inter_pred) {
pd->dst.buf = (*this_mode_pred)->data;
pd->dst.stride = (*this_mode_pred)->stride;
+ } else if (best_filter < filter_end) {
+ mi->interp_filter = best_filter;
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
}
}
@@ -1713,9 +1719,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// process.
// tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
PRED_BUFFER tmp[4];
- DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64]);
+ DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64] VPX_UNINITIALIZED);
#if CONFIG_VP9_HIGHBITDEPTH
- DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64]);
+ DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64] VPX_UNINITIALIZED);
#endif
struct buf_2d orig_dst = pd->dst;
PRED_BUFFER *this_mode_pred = NULL;
@@ -2554,6 +2560,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize]))
continue;
+ if (cpi->sf.rt_intra_dc_only_low_content && this_mode != DC_PRED &&
+ x->content_state_sb != kVeryHighSad)
+ continue;
+
if ((cpi->sf.adaptive_rd_thresh_row_mt &&
rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh,
&rd_thresh_freq_fact[mode_index])) ||
diff --git a/libvpx/vp9/encoder/vp9_ratectrl.c b/libvpx/vp9/encoder/vp9_ratectrl.c
index 6745b0adf..cbafbf7b9 100644
--- a/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -436,7 +436,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->use_post_encode_drop = 0;
rc->ext_use_post_encode_drop = 0;
rc->arf_active_best_quality_adjustment_factor = 1.0;
-
+ rc->arf_increase_active_best_quality = 0;
rc->preserve_arf_as_gld = 0;
rc->preserve_next_arf_as_gld = 0;
rc->show_arf_as_gld = 0;
@@ -504,7 +504,7 @@ static int check_buffer_below_thresh(VP9_COMP *cpi, int drop_mark) {
}
}
-static int drop_frame(VP9_COMP *cpi) {
+int vp9_test_drop(VP9_COMP *cpi) {
const VP9EncoderConfig *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
SVC *svc = &cpi->svc;
@@ -609,13 +609,15 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
SVC *svc = &cpi->svc;
int svc_prev_layer_dropped = 0;
// In the constrained or full_superframe framedrop mode for svc
- // (framedrop_mode != LAYER_DROP), if the previous spatial layer was
- // dropped, drop the current spatial layer.
+ // (framedrop_mode != (LAYER_DROP && CONSTRAINED_FROM_ABOVE)),
+ // if the previous spatial layer was dropped, drop the current spatial layer.
if (cpi->use_svc && svc->spatial_layer_id > 0 &&
svc->drop_spatial_layer[svc->spatial_layer_id - 1])
svc_prev_layer_dropped = 1;
- if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP) ||
- drop_frame(cpi)) {
+ if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP &&
+ svc->framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP) ||
+ svc->force_drop_constrained_from_above[svc->spatial_layer_id] ||
+ vp9_test_drop(cpi)) {
vp9_rc_postencode_update_drop_frame(cpi);
cpi->ext_refresh_frame_flags_pending = 0;
cpi->last_frame_dropped = 1;
@@ -625,14 +627,17 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
svc->drop_count[svc->spatial_layer_id]++;
svc->skip_enhancement_layer = 1;
if (svc->framedrop_mode == LAYER_DROP ||
+ (svc->framedrop_mode == CONSTRAINED_FROM_ABOVE_DROP &&
+ svc->force_drop_constrained_from_above[svc->number_spatial_layers -
+ 1] == 0) ||
svc->drop_spatial_layer[0] == 0) {
- // For the case of constrained drop mode where the base is dropped
- // (drop_spatial_layer[0] == 1), which means full superframe dropped,
- // we don't increment the svc frame counters. In particular temporal
- // layer counter (which is incremented in vp9_inc_frame_in_layer())
- // won't be incremented, so on a dropped frame we try the same
- // temporal_layer_id on next incoming frame. This is to avoid an
- // issue with temporal alignement with full superframe dropping.
+ // For the case of constrained drop mode where full superframe is
+ // dropped, we don't increment the svc frame counters.
+ // In particular temporal layer counter (which is incremented in
+ // vp9_inc_frame_in_layer()) won't be incremented, so on a dropped
+ // frame we try the same temporal_layer_id on next incoming frame.
+ // This is to avoid an issue with temporal alignement with full
+ // superframe dropping.
vp9_inc_frame_in_layer(cpi);
}
if (svc->spatial_layer_id == svc->number_spatial_layers - 1) {
@@ -1420,8 +1425,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
int active_worst_quality = cpi->twopass.active_worst_quality;
int q;
int *inter_minq;
- int arf_active_best_quality_adjustment, arf_active_best_quality_max;
- int *arfgf_high_motion_minq;
+ int arf_active_best_quality_hl;
+ int *arfgf_high_motion_minq, *arfgf_low_motion_minq;
const int boost_frame =
!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
@@ -1448,14 +1453,20 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
if (q < cq_level) q = cq_level;
}
active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
+ arf_active_best_quality_hl = active_best_quality;
- ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_high_motion_minq);
- arf_active_best_quality_max = arfgf_high_motion_minq[q];
- arf_active_best_quality_adjustment =
- arf_active_best_quality_max - active_best_quality;
- active_best_quality = arf_active_best_quality_max -
- (int)(arf_active_best_quality_adjustment *
- rc->arf_active_best_quality_adjustment_factor);
+ if (rc->arf_increase_active_best_quality == 1) {
+ ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_high_motion_minq);
+ arf_active_best_quality_hl = arfgf_high_motion_minq[q];
+ } else if (rc->arf_increase_active_best_quality == -1) {
+ ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_low_motion_minq);
+ arf_active_best_quality_hl = arfgf_low_motion_minq[q];
+ }
+ active_best_quality =
+ (int)((double)active_best_quality *
+ rc->arf_active_best_quality_adjustment_factor +
+ (double)arf_active_best_quality_hl *
+ (1.0 - rc->arf_active_best_quality_adjustment_factor));
// Modify best quality for second level arfs. For mode VPX_Q this
// becomes the baseline frame q.
@@ -1480,17 +1491,30 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
// Extension to max or min Q if undershoot or overshoot is outside
// the permitted range.
if (frame_is_intra_only(cm) || boost_frame) {
+ const int layer_depth = gf_group->layer_depth[gf_group_index];
active_best_quality -=
(cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
active_worst_quality += (cpi->twopass.extend_maxq / 2);
+
+ if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) {
+ assert(layer_depth > 1);
+ active_best_quality =
+ VPXMAX(active_best_quality,
+ cpi->twopass.last_qindex_of_arf_layer[layer_depth - 1]);
+ }
} else {
+ const int max_layer_depth = gf_group->max_layer_depth;
+ assert(max_layer_depth > 0);
+
active_best_quality -=
(cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2;
active_worst_quality += cpi->twopass.extend_maxq;
// For normal frames do not allow an active minq lower than the q used for
// the last boosted frame.
- active_best_quality = VPXMAX(active_best_quality, rc->last_boosted_qindex);
+ active_best_quality =
+ VPXMAX(active_best_quality,
+ cpi->twopass.last_qindex_of_arf_layer[max_layer_depth - 1]);
}
#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
@@ -1789,6 +1813,9 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
RATE_CONTROL *const rc = &cpi->rc;
SVC *const svc = &cpi->svc;
const int qindex = cm->base_qindex;
+ const GF_GROUP *gf_group = &cpi->twopass.gf_group;
+ const int gf_group_index = cpi->twopass.gf_group.index;
+ const int layer_depth = gf_group->layer_depth[gf_group_index];
// Update rate control heuristics
rc->projected_frame_size = (int)(bytes_used << 3);
@@ -1843,6 +1870,15 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
(cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
rc->last_boosted_qindex = qindex;
}
+
+ if ((qindex < cpi->twopass.last_qindex_of_arf_layer[layer_depth]) ||
+ (cm->frame_type == KEY_FRAME) ||
+ (!rc->constrained_gf_group &&
+ (cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
+ cpi->twopass.last_qindex_of_arf_layer[layer_depth] = qindex;
+ }
+
if (frame_is_intra_only(cm)) rc->last_kf_qindex = qindex;
update_buffer_level_postencode(cpi, rc->projected_frame_size);
@@ -2441,12 +2477,23 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
// Set Maximum gf/arf interval
rc->max_gf_interval = oxcf->max_gf_interval;
rc->min_gf_interval = oxcf->min_gf_interval;
+#if CONFIG_RATE_CTRL
+ if (rc->min_gf_interval == 0) {
+ rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(
+ oxcf->width, oxcf->height, oxcf->init_framerate);
+ }
+ if (rc->max_gf_interval == 0) {
+ rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
+ oxcf->init_framerate, rc->min_gf_interval);
+ }
+#else
if (rc->min_gf_interval == 0)
rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(
oxcf->width, oxcf->height, cpi->framerate);
if (rc->max_gf_interval == 0)
rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
cpi->framerate, rc->min_gf_interval);
+#endif
// Extended max interval for genuinely static scenes like slide shows.
rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH;
diff --git a/libvpx/vp9/encoder/vp9_ratectrl.h b/libvpx/vp9/encoder/vp9_ratectrl.h
index 09d69e4d4..7dbe17dc5 100644
--- a/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -198,7 +198,7 @@ typedef struct {
int damped_adjustment[RATE_FACTOR_LEVELS];
double arf_active_best_quality_adjustment_factor;
- int arf_active_best_quality_adjustment_window;
+ int arf_increase_active_best_quality;
int preserve_arf_as_gld;
int preserve_next_arf_as_gld;
@@ -267,6 +267,8 @@ void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi);
// Post encode drop for CBR mode.
int post_encode_drop_cbr(struct VP9_COMP *cpi, size_t *size);
+int vp9_test_drop(struct VP9_COMP *cpi);
+
// Decide if we should drop this frame: For 1-pass CBR.
// Changes only the decimation count in the rate control structure
int vp9_rc_drop_frame(struct VP9_COMP *cpi);
diff --git a/libvpx/vp9/encoder/vp9_rd.h b/libvpx/vp9/encoder/vp9_rd.h
index df6ea9094..908989c07 100644
--- a/libvpx/vp9/encoder/vp9_rd.h
+++ b/libvpx/vp9/encoder/vp9_rd.h
@@ -38,8 +38,6 @@ extern "C" {
#define MV_COST_WEIGHT 108
#define MV_COST_WEIGHT_SUB 120
-#define INVALID_MV 0x80008000
-
#define MAX_MODES 30
#define MAX_REFS 6
diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c
index d07d91774..fa7472ca6 100644
--- a/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/libvpx/vp9/encoder/vp9_rdopt.c
@@ -2494,19 +2494,19 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
MV pred_mv[3];
+ int bestsme = INT_MAX;
#if CONFIG_NON_GREEDY_MV
- double bestsme;
- int_mv nb_full_mvs[NB_MVS_NUM];
- const int nb_full_mv_num = NB_MVS_NUM;
int gf_group_idx = cpi->twopass.gf_group.index;
int gf_rf_idx = ref_frame_to_gf_rf_idx(ref);
BLOCK_SIZE square_bsize = get_square_block_size(bsize);
+ int_mv nb_full_mvs[NB_MVS_NUM] = { 0 };
+ MotionField *motion_field = vp9_motion_field_info_get_motion_field(
+ &cpi->motion_field_info, gf_group_idx, gf_rf_idx, square_bsize);
+ const int nb_full_mv_num =
+ vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
const int lambda = (pw * ph) / 4;
assert(pw * ph == lambda << 2);
- vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col,
- gf_rf_idx, square_bsize, nb_full_mvs);
#else // CONFIG_NON_GREEDY_MV
- int bestsme = INT_MAX;
int sadpb = x->sadperbit16;
#endif // CONFIG_NON_GREEDY_MV
@@ -2580,9 +2580,9 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
mvp_full.row >>= 3;
#if CONFIG_NON_GREEDY_MV
- bestsme = vp9_full_pixel_diamond_new(cpi, x, &mvp_full, step_param, lambda, 1,
- &cpi->fn_ptr[bsize], nb_full_mvs,
- nb_full_mv_num, &tmp_mv->as_mv);
+ bestsme = vp9_full_pixel_diamond_new(cpi, x, bsize, &mvp_full, step_param,
+ lambda, 1, nb_full_mvs, nb_full_mv_num,
+ &tmp_mv->as_mv);
#else // CONFIG_NON_GREEDY_MV
bestsme = vp9_full_pixel_search(
cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb,
@@ -2592,11 +2592,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
if (cpi->sf.enhanced_full_pixel_motion_search) {
int i;
for (i = 0; i < 3; ++i) {
-#if CONFIG_NON_GREEDY_MV
- double this_me;
-#else // CONFIG_NON_GREEDY_MV
int this_me;
-#endif // CONFIG_NON_GREEDY_MV
MV this_mv;
int diff_row;
int diff_col;
@@ -2622,9 +2618,9 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
mvp_full.row >>= 3;
#if CONFIG_NON_GREEDY_MV
this_me = vp9_full_pixel_diamond_new(
- cpi, x, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step),
- lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num,
- &this_mv);
+ cpi, x, bsize, &mvp_full,
+ VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), lambda, 1, nb_full_mvs,
+ nb_full_mv_num, &this_mv);
#else // CONFIG_NON_GREEDY_MV
this_me = vp9_full_pixel_search(
cpi, x, bsize, &mvp_full,
@@ -2678,8 +2674,7 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd,
// However, once established that vector may be usable through the nearest and
// near mv modes to reduce distortion in subsequent blocks and also improve
// visual quality.
-static int discount_newmv_test(const VP9_COMP *cpi, int this_mode,
- int_mv this_mv,
+static int discount_newmv_test(VP9_COMP *cpi, int this_mode, int_mv this_mv,
int_mv (*mode_mv)[MAX_REF_FRAMES], int ref_frame,
int mi_row, int mi_col, BLOCK_SIZE bsize) {
#if CONFIG_NON_GREEDY_MV
@@ -2689,6 +2684,8 @@ static int discount_newmv_test(const VP9_COMP *cpi, int this_mode,
const int gf_group_idx = cpi->twopass.gf_group.index;
const int gf_rf_idx = ref_frame_to_gf_rf_idx(ref_frame);
const TplDepFrame tpl_frame = cpi->tpl_stats[gf_group_idx];
+ const MotionField *motion_field = vp9_motion_field_info_get_motion_field(
+ &cpi->motion_field_info, gf_group_idx, gf_rf_idx, cpi->tpl_bsize);
const int tpl_block_mi_h = num_8x8_blocks_high_lookup[cpi->tpl_bsize];
const int tpl_block_mi_w = num_8x8_blocks_wide_lookup[cpi->tpl_bsize];
const int tpl_mi_row = mi_row - (mi_row % tpl_block_mi_h);
@@ -2697,8 +2694,8 @@ static int discount_newmv_test(const VP9_COMP *cpi, int this_mode,
tpl_frame
.mv_mode_arr[gf_rf_idx][tpl_mi_row * tpl_frame.stride + tpl_mi_col];
if (mv_mode == NEW_MV_MODE) {
- int_mv tpl_new_mv = *get_pyramid_mv(&tpl_frame, gf_rf_idx, cpi->tpl_bsize,
- tpl_mi_row, tpl_mi_col);
+ int_mv tpl_new_mv =
+ vp9_motion_field_mi_get_mv(motion_field, tpl_mi_row, tpl_mi_col);
int row_diff = abs(tpl_new_mv.as_mv.row - this_mv.as_mv.row);
int col_diff = abs(tpl_new_mv.as_mv.col - this_mv.as_mv.col);
if (VPXMAX(row_diff, col_diff) <= 8) {
@@ -3455,7 +3452,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
if (cpi->rc.is_src_frame_alt_ref) {
if (sf->alt_ref_search_fp) {
mode_skip_mask[ALTREF_FRAME] = 0;
- ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
+ ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME) & 0xff;
ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
}
}
diff --git a/libvpx/vp9/encoder/vp9_speed_features.c b/libvpx/vp9/encoder/vp9_speed_features.c
index 529dca040..0b24b5cb3 100644
--- a/libvpx/vp9/encoder/vp9_speed_features.c
+++ b/libvpx/vp9/encoder/vp9_speed_features.c
@@ -456,6 +456,7 @@ static void set_rt_speed_feature_framesize_independent(
sf->variance_part_thresh_mult = 1;
sf->cb_pred_filter_search = 0;
sf->force_smooth_interpol = 0;
+ sf->rt_intra_dc_only_low_content = 0;
if (speed >= 1) {
sf->allow_txfm_domain_distortion = 1;
@@ -535,13 +536,6 @@ static void set_rt_speed_feature_framesize_independent(
int i;
if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0)
sf->use_altref_onepass = 1;
- sf->last_partitioning_redo_frequency = 4;
- sf->adaptive_rd_thresh = 5;
- sf->use_fast_coef_costing = 0;
- sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
- sf->adjust_partitioning_from_last_frame =
- cm->last_frame_type != cm->frame_type ||
- (0 == (frames_since_key + 1) % sf->last_partitioning_redo_frequency);
sf->mv.subpel_force_stop = QUARTER_PEL;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
@@ -550,13 +544,19 @@ static void set_rt_speed_feature_framesize_independent(
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->frame_parameter_update = 0;
sf->mv.search_method = FAST_HEX;
-
- sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW;
- sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
- sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
- sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST;
+ sf->allow_skip_recode = 0;
sf->max_intra_bsize = BLOCK_32X32;
- sf->allow_skip_recode = 1;
+ sf->use_fast_coef_costing = 0;
+ sf->use_quant_fp = !is_keyframe;
+ sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
+ sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
+ sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
+ sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
+ sf->adaptive_rd_thresh = 2;
+ sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED;
+ sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH;
+ sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
+ sf->partition_search_type = VAR_BASED_PARTITION;
}
if (speed >= 5) {
@@ -740,12 +740,7 @@ static void set_rt_speed_feature_framesize_independent(
sf->nonrd_use_ml_partition = 0;
#endif
if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = HALF_PEL;
- // Only keep INTRA_DC mode for speed 8.
- if (!is_keyframe) {
- int i = 0;
- for (i = 0; i < BLOCK_SIZES; ++i)
- sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
- }
+ sf->rt_intra_dc_only_low_content = 1;
if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
content != VP9E_CONTENT_SCREEN) {
// More aggressive short circuit for speed 8.
@@ -771,6 +766,12 @@ static void set_rt_speed_feature_framesize_independent(
}
if (speed >= 9) {
+ // Only keep INTRA_DC mode for speed 9.
+ if (!is_keyframe) {
+ int i = 0;
+ for (i = 0; i < BLOCK_SIZES; ++i)
+ sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
+ }
sf->cb_pred_filter_search = 1;
sf->mv.enable_adaptive_subpel_force_stop = 1;
sf->mv.adapt_subpel_force_stop.mv_thresh = 1;
@@ -817,7 +818,7 @@ static void set_rt_speed_feature_framesize_independent(
}
// TODO(marpan): There is regression for aq-mode=3 speed <= 4, force it
// off for now.
- if (speed <= 4 && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ if (speed <= 3 && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
cpi->oxcf.aq_mode = 0;
}
diff --git a/libvpx/vp9/encoder/vp9_speed_features.h b/libvpx/vp9/encoder/vp9_speed_features.h
index eb0628199..ca284ded8 100644
--- a/libvpx/vp9/encoder/vp9_speed_features.h
+++ b/libvpx/vp9/encoder/vp9_speed_features.h
@@ -608,6 +608,10 @@ typedef struct SPEED_FEATURES {
// Force subpel motion filter to always use SMOOTH_FILTER.
int force_smooth_interpol;
+
+ // For real-time mode: force DC only under intra search when content
+ // does not have high souce SAD.
+ int rt_intra_dc_only_low_content;
} SPEED_FEATURES;
struct VP9_COMP;
diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.c b/libvpx/vp9/encoder/vp9_svc_layercontext.c
index 8ba113bf3..32ee6e064 100644
--- a/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -57,8 +57,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->simulcast_mode = 0;
for (i = 0; i < REF_FRAMES; ++i) {
- svc->fb_idx_spatial_layer_id[i] = -1;
- svc->fb_idx_temporal_layer_id[i] = -1;
+ svc->fb_idx_spatial_layer_id[i] = 0xff;
+ svc->fb_idx_temporal_layer_id[i] = 0xff;
svc->fb_idx_base[i] = 0;
}
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
@@ -74,6 +74,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->fb_idx_upd_tl0[sl] = -1;
svc->drop_count[sl] = 0;
svc->spatial_layer_sync[sl] = 0;
+ svc->force_drop_constrained_from_above[sl] = 0;
}
svc->max_consec_drop = INT_MAX;
@@ -770,6 +771,32 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
svc->mi_rows[svc->spatial_layer_id] = cpi->common.mi_rows;
svc->mi_cols[svc->spatial_layer_id] = cpi->common.mi_cols;
+ // For constrained_from_above drop mode: before encoding superframe (i.e.,
+ // at SL0 frame) check all spatial layers (starting from top) for possible
+ // drop, and if so, set a flag to force drop of that layer and all its lower
+ // layers.
+ if (svc->spatial_layer_to_encode == svc->first_spatial_layer_to_encode) {
+ int sl;
+ for (sl = 0; sl < svc->number_spatial_layers; sl++)
+ svc->force_drop_constrained_from_above[sl] = 0;
+ if (svc->framedrop_mode == CONSTRAINED_FROM_ABOVE_DROP) {
+ for (sl = svc->number_spatial_layers - 1;
+ sl >= svc->first_spatial_layer_to_encode; sl--) {
+ int layer = sl * svc->number_temporal_layers + svc->temporal_layer_id;
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ cpi->rc = lc->rc;
+ cpi->oxcf.target_bandwidth = lc->target_bandwidth;
+ if (vp9_test_drop(cpi)) {
+ int sl2;
+ // Set flag to force drop in encoding for this mode.
+ for (sl2 = sl; sl2 >= svc->first_spatial_layer_to_encode; sl2--)
+ svc->force_drop_constrained_from_above[sl2] = 1;
+ break;
+ }
+ }
+ }
+ }
+
if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
set_flags_and_fb_idx_for_temporal_mode3(cpi);
} else if (svc->temporal_layering_mode ==
diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.h b/libvpx/vp9/encoder/vp9_svc_layercontext.h
index 77d438266..f1ba77970 100644
--- a/libvpx/vp9/encoder/vp9_svc_layercontext.h
+++ b/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -58,7 +58,6 @@ typedef struct {
int gold_ref_idx;
int has_alt_frame;
size_t layer_size;
- struct vpx_psnr_pkt psnr_pkt;
// Cyclic refresh parameters (aq-mode=3), that need to be updated per-frame.
// TODO(jianj/marpan): Is it better to use the full cyclic refresh struct.
int sb_index;
@@ -138,6 +137,7 @@ typedef struct SVC {
int drop_spatial_layer[VPX_MAX_LAYERS];
int framedrop_thresh[VPX_MAX_LAYERS];
int drop_count[VPX_MAX_LAYERS];
+ int force_drop_constrained_from_above[VPX_MAX_LAYERS];
int max_consec_drop;
SVC_LAYER_DROP_MODE framedrop_mode;
diff --git a/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
index aa46c5889..4be6a5ea0 100644
--- a/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
+++ b/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
@@ -114,7 +114,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
// Work out the start point for the search
const uint8_t *best_address = in_what;
const uint8_t *new_best_address = best_address;
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
__m128i v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
#else
__m128i v_ba_d = _mm_set1_epi32((intptr_t)best_address);
@@ -138,7 +138,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
for (i = 0, step = 0; step < tot_steps; step++) {
for (j = 0; j < cfg->searches_per_step; j += 4, i += 4) {
__m128i v_sad_d, v_cost_d, v_outside_d, v_inside_d, v_diff_mv_w;
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
__m128i v_blocka[2];
#else
__m128i v_blocka[1];
@@ -175,7 +175,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
// Compute the SIMD pointer offsets.
{
-#if ARCH_X86_64 // sizeof(intptr_t) == 8
+#if VPX_ARCH_X86_64 // sizeof(intptr_t) == 8
// Load the offsets
__m128i v_bo10_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 0]);
__m128i v_bo32_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 2]);
@@ -186,7 +186,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
// Compute the candidate addresses
v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q);
v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q);
-#else // ARCH_X86 // sizeof(intptr_t) == 4
+#else // VPX_ARCH_X86 // sizeof(intptr_t) == 4
__m128i v_bo_d = _mm_loadu_si128((const __m128i *)&ss_os[i]);
v_bo_d = _mm_and_si128(v_bo_d, v_inside_d);
v_blocka[0] = _mm_add_epi32(v_ba_d, v_bo_d);
@@ -294,7 +294,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
best_address = new_best_address;
v_bmv_w = _mm_set1_epi32(bmv.as_int);
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
#else
v_ba_d = _mm_set1_epi32((intptr_t)best_address);
diff --git a/libvpx/vp9/encoder/x86/vp9_error_sse2.asm b/libvpx/vp9/encoder/x86/vp9_error_sse2.asm
index 11d473b2d..7beec130a 100644
--- a/libvpx/vp9/encoder/x86/vp9_error_sse2.asm
+++ b/libvpx/vp9/encoder/x86/vp9_error_sse2.asm
@@ -58,7 +58,7 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
movhlps m7, m6
paddq m4, m5
paddq m6, m7
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
movq rax, m4
movq [sszq], m6
%else
@@ -105,7 +105,7 @@ cglobal block_error_fp, 3, 3, 6, uqc, dqc, size
; accumulate horizontally and store in return value
movhlps m5, m4
paddq m4, m5
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
movq rax, m4
%else
pshufd m5, m4, 0x1
diff --git a/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c b/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
index 885220a71..e3d803b8f 100644
--- a/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -25,7 +25,7 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *iscan) {
__m128i zero;
__m128i thr;
- int16_t nzflag;
+ int nzflag;
__m128i eob;
__m128i round, quant, dequant;
diff --git a/libvpx/vp9/simple_encode.cc b/libvpx/vp9/simple_encode.cc
new file mode 100644
index 000000000..6a35eb6bc
--- /dev/null
+++ b/libvpx/vp9/simple_encode.cc
@@ -0,0 +1,313 @@
+#include <vector>
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/vp9_iface_common.h"
+#include "vp9/encoder/vp9_encoder.h"
+#include "vp9/encoder/vp9_firstpass.h"
+#include "vp9/simple_encode.h"
+#include "vp9/vp9_cx_iface.h"
+
+namespace vp9 {
+
+// TODO(angiebird): Merge this function with vpx_img_plane_width()
+static int img_plane_width(const vpx_image_t *img, int plane) {
+ if (plane > 0 && img->x_chroma_shift > 0)
+ return (img->d_w + 1) >> img->x_chroma_shift;
+ else
+ return img->d_w;
+}
+
+// TODO(angiebird): Merge this function with vpx_img_plane_height()
+static int img_plane_height(const vpx_image_t *img, int plane) {
+ if (plane > 0 && img->y_chroma_shift > 0)
+ return (img->d_h + 1) >> img->y_chroma_shift;
+ else
+ return img->d_h;
+}
+
+// TODO(angiebird): Merge this function with vpx_img_read()
+static int img_read(vpx_image_t *img, FILE *file) {
+ int plane;
+
+ for (plane = 0; plane < 3; ++plane) {
+ unsigned char *buf = img->planes[plane];
+ const int stride = img->stride[plane];
+ const int w = img_plane_width(img, plane) *
+ ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
+ const int h = img_plane_height(img, plane);
+ int y;
+
+ for (y = 0; y < h; ++y) {
+ if (fread(buf, 1, w, file) != (size_t)w) return 0;
+ buf += stride;
+ }
+ }
+
+ return 1;
+}
+
+class SimpleEncode::EncodeImpl {
+ public:
+ VP9_COMP *cpi;
+ vpx_img_fmt_t img_fmt;
+ vpx_image_t tmp_img;
+ std::vector<FIRSTPASS_STATS> first_pass_stats;
+};
+
+static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf,
+ vpx_img_fmt_t img_fmt) {
+ VP9_COMP *cpi;
+ BufferPool *buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(*buffer_pool));
+ vp9_initialize_enc();
+ cpi = vp9_create_compressor(oxcf, buffer_pool);
+ vp9_update_compressor_with_img_fmt(cpi, img_fmt);
+ return cpi;
+}
+
+static void free_encoder(VP9_COMP *cpi) {
+ BufferPool *buffer_pool = cpi->common.buffer_pool;
+ vp9_remove_compressor(cpi);
+ // buffer_pool needs to be free after cpi because buffer_pool contains
+ // allocated buffers that will be free in vp9_remove_compressor()
+ vpx_free(buffer_pool);
+}
+
+static INLINE vpx_rational_t make_vpx_rational(int num, int den) {
+ vpx_rational_t v;
+ v.num = num;
+ v.den = den;
+ return v;
+}
+
+static INLINE FrameType
+get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type) {
+ // TODO(angiebird): Figure out if we need frame type other than key frame,
+ // alternate reference and inter frame
+ switch (update_type) {
+ case KF_UPDATE: return kKeyFrame; break;
+ case ARF_UPDATE: return kAlternateReference; break;
+ default: return kInterFrame; break;
+ }
+}
+
+static void update_encode_frame_result(
+ EncodeFrameResult *encode_frame_result,
+ const ENCODE_FRAME_RESULT *encode_frame_info) {
+ encode_frame_result->coding_data_bit_size =
+ encode_frame_result->coding_data_byte_size * 8;
+ encode_frame_result->show_idx = encode_frame_info->show_idx;
+ encode_frame_result->frame_type =
+ get_frame_type_from_update_type(encode_frame_info->update_type);
+ encode_frame_result->psnr = encode_frame_info->psnr;
+ encode_frame_result->sse = encode_frame_info->sse;
+ encode_frame_result->quantize_index = encode_frame_info->quantize_index;
+}
+
+SimpleEncode::SimpleEncode(int frame_width, int frame_height,
+ int frame_rate_num, int frame_rate_den,
+ int target_bitrate, int num_frames,
+ const char *infile_path) {
+ impl_ptr_ = std::unique_ptr<EncodeImpl>(new EncodeImpl());
+ frame_width_ = frame_width;
+ frame_height_ = frame_height;
+ frame_rate_num_ = frame_rate_num;
+ frame_rate_den_ = frame_rate_den;
+ target_bitrate_ = target_bitrate;
+ num_frames_ = num_frames;
+ // TODO(angirbid): Should we keep a file pointer here or keep the file_path?
+ file_ = fopen(infile_path, "r");
+ impl_ptr_->cpi = NULL;
+ impl_ptr_->img_fmt = VPX_IMG_FMT_I420;
+}
+
+void SimpleEncode::ComputeFirstPassStats() {
+ vpx_rational_t frame_rate =
+ make_vpx_rational(frame_rate_num_, frame_rate_den_);
+ const VP9EncoderConfig oxcf =
+ vp9_get_encoder_config(frame_width_, frame_height_, frame_rate,
+ target_bitrate_, VPX_RC_FIRST_PASS);
+ VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
+ struct lookahead_ctx *lookahead = cpi->lookahead;
+ int i;
+ int use_highbitdepth = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+ use_highbitdepth = cpi->common.use_highbitdepth;
+#endif
+ vpx_image_t img;
+ vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1);
+ rewind(file_);
+ impl_ptr_->first_pass_stats.clear();
+ for (i = 0; i < num_frames_; ++i) {
+ assert(!vp9_lookahead_full(lookahead));
+ if (img_read(&img, file_)) {
+ int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
+ int64_t ts_start =
+ timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx);
+ int64_t ts_end =
+ timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx + 1);
+ YV12_BUFFER_CONFIG sd;
+ image2yuvconfig(&img, &sd);
+ vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
+ {
+ int64_t time_stamp;
+ int64_t time_end;
+ int flush = 1; // Makes vp9_get_compressed_data process a frame
+ size_t size;
+ unsigned int frame_flags = 0;
+ ENCODE_FRAME_RESULT encode_frame_info;
+ // TODO(angiebird): Call vp9_first_pass directly
+ vp9_get_compressed_data(cpi, &frame_flags, &size, NULL, &time_stamp,
+ &time_end, flush, &encode_frame_info);
+ // vp9_get_compressed_data only generates first pass stats not
+ // compresses data
+ assert(size == 0);
+ }
+ impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass));
+ }
+ }
+ vp9_end_first_pass(cpi);
+ // TODO(angiebird): Store the total_stats apart form first_pass_stats
+ impl_ptr_->first_pass_stats.push_back(vp9_get_total_stats(&cpi->twopass));
+ free_encoder(cpi);
+ rewind(file_);
+ vpx_img_free(&img);
+}
+
+std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
+ std::vector<std::vector<double>> output_stats;
+ // TODO(angiebird): This function make several assumptions of
+ // FIRSTPASS_STATS. 1) All elements in FIRSTPASS_STATS are double except the
+ // last one. 2) The last entry of first_pass_stats is the total_stats.
+ // Change the code structure, so that we don't have to make these assumptions
+
+ // Note the last entry of first_pass_stats is the total_stats, we don't need
+ // it.
+ for (size_t i = 0; i < impl_ptr_->first_pass_stats.size() - 1; ++i) {
+ double *buf_start =
+ reinterpret_cast<double *>(&impl_ptr_->first_pass_stats[i]);
+ // We use - 1 here because the last member in FIRSTPASS_STATS is not double
+ double *buf_end =
+ buf_start + sizeof(impl_ptr_->first_pass_stats[i]) / sizeof(*buf_end) -
+ 1;
+ std::vector<double> this_stats(buf_start, buf_end);
+ output_stats.push_back(this_stats);
+ }
+ return output_stats;
+}
+
+void SimpleEncode::StartEncode() {
+ assert(impl_ptr_->first_pass_stats.size() > 0);
+ vpx_rational_t frame_rate =
+ make_vpx_rational(frame_rate_num_, frame_rate_den_);
+ VP9EncoderConfig oxcf =
+ vp9_get_encoder_config(frame_width_, frame_height_, frame_rate,
+ target_bitrate_, VPX_RC_LAST_PASS);
+ vpx_fixed_buf_t stats;
+ stats.buf = impl_ptr_->first_pass_stats.data();
+ stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) *
+ impl_ptr_->first_pass_stats.size();
+
+ vp9_set_first_pass_stats(&oxcf, &stats);
+ assert(impl_ptr_->cpi == NULL);
+ impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
+ vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_,
+ frame_height_, 1);
+ rewind(file_);
+}
+
+void SimpleEncode::EndEncode() {
+ free_encoder(impl_ptr_->cpi);
+ impl_ptr_->cpi = nullptr;
+ vpx_img_free(&impl_ptr_->tmp_img);
+ rewind(file_);
+}
+
+int SimpleEncode::GetKeyFrameGroupSize(int key_frame_index) const {
+ const VP9_COMP *cpi = impl_ptr_->cpi;
+ return vp9_get_frames_to_next_key(&cpi->oxcf, &cpi->frame_info,
+ &cpi->twopass.first_pass_info,
+ key_frame_index, cpi->rc.min_gf_interval);
+}
+
+void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) {
+ VP9_COMP *cpi = impl_ptr_->cpi;
+ struct lookahead_ctx *lookahead = cpi->lookahead;
+ int use_highbitdepth = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+ use_highbitdepth = cpi->common.use_highbitdepth;
+#endif
+ // The lookahead's size is set to oxcf->lag_in_frames.
+ // We want to fill lookahead to it's max capacity if possible so that the
+ // encoder can construct alt ref frame in time.
+ // In the other words, we hope vp9_get_compressed_data to encode a frame
+ // every time in the function
+ while (!vp9_lookahead_full(lookahead)) {
+ // TODO(angiebird): Check whether we can move this file read logics to
+ // lookahead
+ if (img_read(&impl_ptr_->tmp_img, file_)) {
+ int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
+ int64_t ts_start =
+ timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx);
+ int64_t ts_end = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts,
+ next_show_idx + 1);
+ YV12_BUFFER_CONFIG sd;
+ image2yuvconfig(&impl_ptr_->tmp_img, &sd);
+ vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
+ } else {
+ break;
+ }
+ }
+ assert(encode_frame_result->coding_data.get() == nullptr);
+ const size_t max_coding_data_byte_size = frame_width_ * frame_height_ * 3;
+ encode_frame_result->coding_data = std::move(
+ std::unique_ptr<uint8_t[]>(new uint8_t[max_coding_data_byte_size]));
+ int64_t time_stamp;
+ int64_t time_end;
+ int flush = 1; // Make vp9_get_compressed_data encode a frame
+ unsigned int frame_flags = 0;
+ ENCODE_FRAME_RESULT encode_frame_info;
+ vp9_get_compressed_data(cpi, &frame_flags,
+ &encode_frame_result->coding_data_byte_size,
+ encode_frame_result->coding_data.get(), &time_stamp,
+ &time_end, flush, &encode_frame_info);
+ // vp9_get_compressed_data is expected to encode a frame every time, so the
+ // data size should be greater than zero.
+ assert(encode_frame_result->coding_data_byte_size > 0);
+ assert(encode_frame_result->coding_data_byte_size <
+ max_coding_data_byte_size);
+
+ update_encode_frame_result(encode_frame_result, &encode_frame_info);
+}
+
+void SimpleEncode::EncodeFrameWithQuantizeIndex(
+ EncodeFrameResult *encode_frame_result, int quantize_index) {
+ encode_command_set_external_quantize_index(&impl_ptr_->cpi->encode_command,
+ quantize_index);
+ EncodeFrame(encode_frame_result);
+ encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command);
+}
+
+int SimpleEncode::GetCodingFrameNum() const {
+ assert(impl_ptr_->first_pass_stats.size() - 1 > 0);
+ // These are the default settings for now.
+ const int multi_layer_arf = 0;
+ const int allow_alt_ref = 1;
+ vpx_rational_t frame_rate =
+ make_vpx_rational(frame_rate_num_, frame_rate_den_);
+ const VP9EncoderConfig oxcf =
+ vp9_get_encoder_config(frame_width_, frame_height_, frame_rate,
+ target_bitrate_, VPX_RC_LAST_PASS);
+ FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
+ FIRST_PASS_INFO first_pass_info;
+ fps_init_first_pass_info(&first_pass_info, impl_ptr_->first_pass_stats.data(),
+ num_frames_);
+ return vp9_get_coding_frame_num(&oxcf, &frame_info, &first_pass_info,
+ multi_layer_arf, allow_alt_ref);
+}
+
+SimpleEncode::~SimpleEncode() {
+ if (this->file_ != NULL) {
+ fclose(this->file_);
+ }
+}
+
+} // namespace vp9
diff --git a/libvpx/vp9/simple_encode.h b/libvpx/vp9/simple_encode.h
new file mode 100644
index 000000000..471b4e7a8
--- /dev/null
+++ b/libvpx/vp9/simple_encode.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_SIMPLE_ENCODE_H_
+#define VPX_VP9_SIMPLE_ENCODE_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <memory>
+#include <vector>
+
+namespace vp9 {
+
+enum FrameType {
+ kKeyFrame = 0,
+ kInterFrame,
+ kAlternateReference,
+};
+
+struct EncodeFrameResult {
+ int show_idx;
+ FrameType frame_type;
+ size_t coding_data_bit_size;
+ size_t coding_data_byte_size;
+ // The EncodeFrame will allocate a buffer, write the coding data into the
+ // buffer and give the ownership of the buffer to coding_data.
+ std::unique_ptr<unsigned char[]> coding_data;
+ double psnr;
+ uint64_t sse;
+ int quantize_index;
+};
+
+class SimpleEncode {
+ public:
+ SimpleEncode(int frame_width, int frame_height, int frame_rate_num,
+ int frame_rate_den, int target_bitrate, int num_frames,
+ const char *infile_path);
+ ~SimpleEncode();
+ SimpleEncode(SimpleEncode &) = delete;
+ SimpleEncode &operator=(const SimpleEncode &) = delete;
+
+ // Makes encoder compute the first pass stats and store it internally for
+ // future encode.
+ void ComputeFirstPassStats();
+
+ // Outputs the first pass stats represented by a 2-D vector.
+ // One can use the frame index at first dimension to retrieve the stats for
+ // each video frame. The stats of each video frame is a vector of 25 double
+ // values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h
+ std::vector<std::vector<double>> ObserveFirstPassStats();
+
+ // Initializes the encoder for actual encoding.
+ // This function should be called after ComputeFirstPassStats().
+ void StartEncode();
+
+ // Frees the encoder.
+ // This function should be called after StartEncode() or EncodeFrame().
+ void EndEncode();
+
+ // Given a key_frame_index, computes this key frame group's size.
+ // The key frame group size includes one key frame plus the number of
+ // following inter frames. Note that the key frame group size only counts the
+ // show frames. The number of no show frames like alternate refereces are not
+ // counted.
+ int GetKeyFrameGroupSize(int key_frame_index) const;
+
+ // Encodes a frame
+ // This function should be called after StartEncode() and before EndEncode().
+ void EncodeFrame(EncodeFrameResult *encode_frame_result);
+
+ // Encodes a frame with a specific quantize index.
+ // This function should be called after StartEncode() and before EndEncode().
+ void EncodeFrameWithQuantizeIndex(EncodeFrameResult *encode_frame_result,
+ int quantize_index);
+
+ // Gets the number of coding frames for the video. The coding frames include
+ // show frame and no show frame.
+ // This function should be called after ComputeFirstPassStats().
+ int GetCodingFrameNum() const;
+
+ private:
+ class EncodeImpl;
+
+ int frame_width_;
+ int frame_height_;
+ int frame_rate_num_;
+ int frame_rate_den_;
+ int target_bitrate_;
+ int num_frames_;
+ std::FILE *file_;
+ std::unique_ptr<EncodeImpl> impl_ptr_;
+};
+
+} // namespace vp9
+
+#endif // VPX_VP9_SIMPLE_ENCODE_H_
diff --git a/libvpx/vp9/vp9_common.mk b/libvpx/vp9/vp9_common.mk
index c9a55669e..5ef2f891a 100644
--- a/libvpx/vp9/vp9_common.mk
+++ b/libvpx/vp9/vp9_common.mk
@@ -10,6 +10,7 @@
VP9_COMMON_SRCS-yes += vp9_common.mk
VP9_COMMON_SRCS-yes += vp9_iface_common.h
+VP9_COMMON_SRCS-yes += vp9_iface_common.c
VP9_COMMON_SRCS-yes += common/vp9_ppflags.h
VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c
VP9_COMMON_SRCS-yes += common/vp9_blockd.c
diff --git a/libvpx/vp9/vp9_cx_iface.c b/libvpx/vp9/vp9_cx_iface.c
index 45e03f2de..f415e50f7 100644
--- a/libvpx/vp9/vp9_cx_iface.c
+++ b/libvpx/vp9/vp9_cx_iface.c
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_encoder.h"
+#include "vpx_dsp/psnr.h"
#include "vpx_ports/vpx_once.h"
#include "vpx_ports/system_state.h"
#include "vpx_util/vpx_timestamp.h"
@@ -20,10 +21,14 @@
#include "./vpx_version.h"
#include "vp9/encoder/vp9_encoder.h"
#include "vpx/vp8cx.h"
+#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/vp9_cx_iface.h"
#include "vp9/encoder/vp9_firstpass.h"
+#include "vp9/encoder/vp9_lookahead.h"
+#include "vp9/vp9_cx_iface.h"
#include "vp9/vp9_iface_common.h"
-struct vp9_extracfg {
+typedef struct vp9_extracfg {
int cpu_used; // available cpu percentage in 1/16
unsigned int enable_auto_alt_ref;
unsigned int noise_sensitivity;
@@ -55,7 +60,7 @@ struct vp9_extracfg {
int render_height;
unsigned int row_mt;
unsigned int motion_vector_unit_test;
-};
+} vp9_extracfg;
static struct vp9_extracfg default_extra_cfg = {
0, // cpu_used
@@ -466,6 +471,15 @@ static void config_target_level(VP9EncoderConfig *oxcf) {
}
}
+static vpx_rational64_t get_g_timebase_in_ts(vpx_rational_t g_timebase) {
+ vpx_rational64_t g_timebase_in_ts;
+ g_timebase_in_ts.den = g_timebase.den;
+ g_timebase_in_ts.num = g_timebase.num;
+ g_timebase_in_ts.num *= TICKS_PER_SEC;
+ reduce_ratio(&g_timebase_in_ts);
+ return g_timebase_in_ts;
+}
+
static vpx_codec_err_t set_encoder_config(
VP9EncoderConfig *oxcf, const vpx_codec_enc_cfg_t *cfg,
const struct vp9_extracfg *extra_cfg) {
@@ -477,9 +491,13 @@ static vpx_codec_err_t set_encoder_config(
oxcf->height = cfg->g_h;
oxcf->bit_depth = cfg->g_bit_depth;
oxcf->input_bit_depth = cfg->g_input_bit_depth;
+ // TODO(angiebird): Figure out if we can just use g_timebase to indicate the
+ // inverse of framerate
// guess a frame rate if out of whack, use 30
oxcf->init_framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num;
if (oxcf->init_framerate > 180) oxcf->init_framerate = 30;
+ oxcf->g_timebase = cfg->g_timebase;
+ oxcf->g_timebase_in_ts = get_g_timebase_in_ts(oxcf->g_timebase);
oxcf->mode = GOOD;
@@ -539,10 +557,16 @@ static vpx_codec_err_t set_encoder_config(
oxcf->speed = abs(extra_cfg->cpu_used);
oxcf->encode_breakout = extra_cfg->static_thresh;
oxcf->enable_auto_arf = extra_cfg->enable_auto_alt_ref;
- oxcf->noise_sensitivity = extra_cfg->noise_sensitivity;
+ if (oxcf->bit_depth == VPX_BITS_8) {
+ oxcf->noise_sensitivity = extra_cfg->noise_sensitivity;
+ } else {
+ // Disable denoiser for high bitdepth since vp9_denoiser_filter only works
+ // for 8 bits.
+ oxcf->noise_sensitivity = 0;
+ }
oxcf->sharpness = extra_cfg->sharpness;
- oxcf->two_pass_stats_in = cfg->rc_twopass_stats_in;
+ vp9_set_first_pass_stats(oxcf, &cfg->rc_twopass_stats_in);
#if CONFIG_FP_MB_STATS
oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in;
@@ -611,40 +635,7 @@ static vpx_codec_err_t set_encoder_config(
}
if (get_level_index(oxcf->target_level) >= 0) config_target_level(oxcf);
- /*
- printf("Current VP9 Settings: \n");
- printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
- printf("target_level: %d\n", oxcf->target_level);
- printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity);
- printf("sharpness: %d\n", oxcf->sharpness);
- printf("cpu_used: %d\n", oxcf->cpu_used);
- printf("Mode: %d\n", oxcf->mode);
- printf("auto_key: %d\n", oxcf->auto_key);
- printf("key_freq: %d\n", oxcf->key_freq);
- printf("end_usage: %d\n", oxcf->end_usage);
- printf("under_shoot_pct: %d\n", oxcf->under_shoot_pct);
- printf("over_shoot_pct: %d\n", oxcf->over_shoot_pct);
- printf("starting_buffer_level: %d\n", oxcf->starting_buffer_level);
- printf("optimal_buffer_level: %d\n", oxcf->optimal_buffer_level);
- printf("maximum_buffer_size: %d\n", oxcf->maximum_buffer_size);
- printf("fixed_q: %d\n", oxcf->fixed_q);
- printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q);
- printf("best_allowed_q: %d\n", oxcf->best_allowed_q);
- printf("allow_spatial_resampling: %d\n", oxcf->allow_spatial_resampling);
- printf("scaled_frame_width: %d\n", oxcf->scaled_frame_width);
- printf("scaled_frame_height: %d\n", oxcf->scaled_frame_height);
- printf("two_pass_vbrbias: %d\n", oxcf->two_pass_vbrbias);
- printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
- printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
- printf("vbr_corpus_complexity: %d\n", oxcf->vbr_corpus_complexity);
- printf("lag_in_frames: %d\n", oxcf->lag_in_frames);
- printf("enable_auto_arf: %d\n", oxcf->enable_auto_arf);
- printf("Version: %d\n", oxcf->Version);
- printf("encode_breakout: %d\n", oxcf->encode_breakout);
- printf("error resilient: %d\n", oxcf->error_resilient_mode);
- printf("frame parallel detokenization: %d\n",
- oxcf->frame_parallel_decoding_mode);
- */
+ // vp9_dump_encoder_config(oxcf);
return VPX_CODEC_OK;
}
@@ -935,10 +926,9 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
if (res == VPX_CODEC_OK) {
priv->pts_offset_initialized = 0;
- priv->timestamp_ratio.den = priv->cfg.g_timebase.den;
- priv->timestamp_ratio.num = (int64_t)priv->cfg.g_timebase.num;
- priv->timestamp_ratio.num *= TICKS_PER_SEC;
- reduce_ratio(&priv->timestamp_ratio);
+ // TODO(angiebird): Replace priv->timestamp_ratio by
+ // oxcf->g_timebase_in_ts
+ priv->timestamp_ratio = get_g_timebase_in_ts(priv->cfg.g_timebase);
set_encoder_config(&priv->oxcf, &priv->cfg, &priv->extra_cfg);
#if CONFIG_VP9_HIGHBITDEPTH
@@ -946,10 +936,7 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
(ctx->init_flags & VPX_CODEC_USE_HIGHBITDEPTH) ? 1 : 0;
#endif
priv->cpi = vp9_create_compressor(&priv->oxcf, priv->buffer_pool);
- if (priv->cpi == NULL)
- res = VPX_CODEC_MEM_ERROR;
- else
- priv->cpi->output_pkt_list = &priv->pkt_list.head;
+ if (priv->cpi == NULL) res = VPX_CODEC_MEM_ERROR;
}
}
@@ -1067,18 +1054,6 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) {
return index_sz;
}
-static int64_t timebase_units_to_ticks(const vpx_rational64_t *timestamp_ratio,
- int64_t n) {
- return n * timestamp_ratio->num / timestamp_ratio->den;
-}
-
-static int64_t ticks_to_timebase_units(const vpx_rational64_t *timestamp_ratio,
- int64_t n) {
- int64_t round = timestamp_ratio->num / 2;
- if (round > 0) --round;
- return (n * timestamp_ratio->den + round) / timestamp_ratio->num;
-}
-
static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi,
unsigned int lib_flags) {
vpx_codec_frame_flags_t flags = lib_flags << 16;
@@ -1096,6 +1071,27 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi,
return flags;
}
+static INLINE vpx_codec_cx_pkt_t get_psnr_pkt(const PSNR_STATS *psnr) {
+ vpx_codec_cx_pkt_t pkt;
+ pkt.kind = VPX_CODEC_PSNR_PKT;
+ pkt.data.psnr = *psnr;
+ return pkt;
+}
+
+#if !CONFIG_REALTIME_ONLY
+static INLINE vpx_codec_cx_pkt_t
+get_first_pass_stats_pkt(FIRSTPASS_STATS *stats) {
+ // WARNNING: This function assumes that stats will
+ // exist and not be changed until the packet is processed
+ // TODO(angiebird): Refactor the code to avoid using the assumption.
+ vpx_codec_cx_pkt_t pkt;
+ pkt.kind = VPX_CODEC_STATS_PKT;
+ pkt.data.twopass_stats.buf = stats;
+ pkt.data.twopass_stats.sz = sizeof(*stats);
+ return pkt;
+}
+#endif
+
const size_t kMinCompressedSize = 8192;
static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
const vpx_image_t *img,
@@ -1109,19 +1105,11 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
VP9_COMP *const cpi = ctx->cpi;
const vpx_rational64_t *const timestamp_ratio = &ctx->timestamp_ratio;
size_t data_sz;
+ vpx_codec_cx_pkt_t pkt;
+ memset(&pkt, 0, sizeof(pkt));
if (cpi == NULL) return VPX_CODEC_INVALID_PARAM;
- if (cpi->oxcf.pass == 2 && cpi->level_constraint.level_index >= 0 &&
- !cpi->level_constraint.rc_config_updated) {
- const VP9EncoderConfig *const oxcf = &cpi->oxcf;
- TWO_PASS *const twopass = &cpi->twopass;
- FIRSTPASS_STATS *stats = &twopass->total_stats;
- twopass->bits_left =
- (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
- cpi->level_constraint.rc_config_updated = 1;
- }
-
if (img != NULL) {
res = validate_img(ctx, img);
if (res == VPX_CODEC_OK) {
@@ -1223,92 +1211,135 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
}
}
- while (cx_data_sz >= ctx->cx_data_sz / 2 &&
- -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
- &dst_time_stamp, &dst_end_time_stamp,
- !img)) {
- if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) {
- vpx_codec_cx_pkt_t pkt;
-
- // Pack invisible frames with the next visible frame
- if (!cpi->common.show_frame ||
- (cpi->use_svc &&
- cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)) {
- if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
- ctx->pending_cx_data_sz += size;
- if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
- ctx->pending_frame_magnitude |= size;
- cx_data += size;
- cx_data_sz -= size;
+ if (cpi->oxcf.pass == 1 && !cpi->use_svc) {
+#if !CONFIG_REALTIME_ONLY
+ // compute first pass stats
+ if (img) {
+ int ret;
+ ENCODE_FRAME_RESULT encode_frame_result;
+ vpx_codec_cx_pkt_t fps_pkt;
+ // TODO(angiebird): Call vp9_first_pass directly
+ ret = vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
+ &dst_time_stamp, &dst_end_time_stamp,
+ !img, &encode_frame_result);
+ assert(size == 0); // There is no compressed data in the first pass
+ (void)ret;
+ assert(ret == 0);
+ fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.this_frame_stats);
+ vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt);
+ } else {
+ if (!cpi->twopass.first_pass_done) {
+ vpx_codec_cx_pkt_t fps_pkt;
+ vp9_end_first_pass(cpi);
+ fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.total_stats);
+ vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt);
+ }
+ }
+#else // !CONFIG_REALTIME_ONLY
+ assert(0);
+#endif // !CONFIG_REALTIME_ONLY
+ } else {
+ ENCODE_FRAME_RESULT encode_frame_result;
+ while (cx_data_sz >= ctx->cx_data_sz / 2 &&
+ -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
+ &dst_time_stamp, &dst_end_time_stamp,
+ !img, &encode_frame_result)) {
+ // Pack psnr pkt
+ if (size > 0 && !cpi->use_svc) {
+ // TODO(angiebird): Figure out while we don't need psnr pkt when
+ // use_svc is on
+ PSNR_STATS psnr;
+ if (vp9_get_psnr(cpi, &psnr)) {
+ vpx_codec_cx_pkt_t psnr_pkt = get_psnr_pkt(&psnr);
+ vpx_codec_pkt_list_add(&ctx->pkt_list.head, &psnr_pkt);
+ }
+ }
+
+ if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) {
+ // Pack invisible frames with the next visible frame
+ if (!cpi->common.show_frame ||
+ (cpi->use_svc && cpi->svc.spatial_layer_id <
+ cpi->svc.number_spatial_layers - 1)) {
+ if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
+ ctx->pending_cx_data_sz += size;
+ if (size)
+ ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+ ctx->pending_frame_magnitude |= size;
+ cx_data += size;
+ cx_data_sz -= size;
+ pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
+ pkt.data.frame.height[cpi->svc.spatial_layer_id] =
+ cpi->common.height;
+ pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
+ 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
+
+ if (ctx->output_cx_pkt_cb.output_cx_pkt) {
+ pkt.kind = VPX_CODEC_CX_FRAME_PKT;
+ pkt.data.frame.pts =
+ ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
+ ctx->pts_offset;
+ pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
+ timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
+ pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
+ pkt.data.frame.buf = ctx->pending_cx_data;
+ pkt.data.frame.sz = size;
+ ctx->pending_cx_data = NULL;
+ ctx->pending_cx_data_sz = 0;
+ ctx->pending_frame_count = 0;
+ ctx->pending_frame_magnitude = 0;
+ ctx->output_cx_pkt_cb.output_cx_pkt(
+ &pkt, ctx->output_cx_pkt_cb.user_priv);
+ }
+ continue;
+ }
+
+ // Add the frame packet to the list of returned packets.
+ pkt.kind = VPX_CODEC_CX_FRAME_PKT;
+ pkt.data.frame.pts =
+ ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
+ ctx->pts_offset;
+ pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
+ timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
+ pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
- if (ctx->output_cx_pkt_cb.output_cx_pkt) {
- pkt.kind = VPX_CODEC_CX_FRAME_PKT;
- pkt.data.frame.pts =
- ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
- ctx->pts_offset;
- pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
- timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
- pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
+ if (ctx->pending_cx_data) {
+ if (size)
+ ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+ ctx->pending_frame_magnitude |= size;
+ ctx->pending_cx_data_sz += size;
+ // write the superframe only for the case when
+ if (!ctx->output_cx_pkt_cb.output_cx_pkt)
+ size += write_superframe_index(ctx);
pkt.data.frame.buf = ctx->pending_cx_data;
- pkt.data.frame.sz = size;
+ pkt.data.frame.sz = ctx->pending_cx_data_sz;
ctx->pending_cx_data = NULL;
ctx->pending_cx_data_sz = 0;
ctx->pending_frame_count = 0;
ctx->pending_frame_magnitude = 0;
+ } else {
+ pkt.data.frame.buf = cx_data;
+ pkt.data.frame.sz = size;
+ }
+ pkt.data.frame.partition_id = -1;
+
+ if (ctx->output_cx_pkt_cb.output_cx_pkt)
ctx->output_cx_pkt_cb.output_cx_pkt(
&pkt, ctx->output_cx_pkt_cb.user_priv);
- }
- continue;
- }
+ else
+ vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
- // Add the frame packet to the list of returned packets.
- pkt.kind = VPX_CODEC_CX_FRAME_PKT;
- pkt.data.frame.pts =
- ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
- ctx->pts_offset;
- pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
- timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
- pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
- pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
- pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
- pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
- 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
-
- if (ctx->pending_cx_data) {
- if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
- ctx->pending_frame_magnitude |= size;
- ctx->pending_cx_data_sz += size;
- // write the superframe only for the case when
- if (!ctx->output_cx_pkt_cb.output_cx_pkt)
- size += write_superframe_index(ctx);
- pkt.data.frame.buf = ctx->pending_cx_data;
- pkt.data.frame.sz = ctx->pending_cx_data_sz;
- ctx->pending_cx_data = NULL;
- ctx->pending_cx_data_sz = 0;
- ctx->pending_frame_count = 0;
- ctx->pending_frame_magnitude = 0;
- } else {
- pkt.data.frame.buf = cx_data;
- pkt.data.frame.sz = size;
- }
- pkt.data.frame.partition_id = -1;
-
- if (ctx->output_cx_pkt_cb.output_cx_pkt)
- ctx->output_cx_pkt_cb.output_cx_pkt(&pkt,
- ctx->output_cx_pkt_cb.user_priv);
- else
- vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
-
- cx_data += size;
- cx_data_sz -= size;
- if (is_one_pass_cbr_svc(cpi) &&
- (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
- // Encoded all spatial layers; exit loop.
- break;
+ cx_data += size;
+ cx_data_sz -= size;
+ if (is_one_pass_cbr_svc(cpi) &&
+ (cpi->svc.spatial_layer_id ==
+ cpi->svc.number_spatial_layers - 1)) {
+ // Encoded all spatial layers; exit loop.
+ break;
+ }
}
}
}
@@ -1765,7 +1796,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
VPX_VBR, // rc_end_usage
{ NULL, 0 }, // rc_twopass_stats_in
{ NULL, 0 }, // rc_firstpass_mb_stats_in
- 256, // rc_target_bandwidth
+ 256, // rc_target_bitrate
0, // rc_min_quantizer
63, // rc_max_quantizer
25, // rc_undershoot_pct
@@ -1831,3 +1862,222 @@ CODEC_INTERFACE(vpx_codec_vp9_cx) = {
NULL // vpx_codec_enc_mr_get_mem_loc_fn_t
}
};
+
+static vpx_codec_enc_cfg_t get_enc_cfg(int frame_width, int frame_height,
+ vpx_rational_t frame_rate,
+ int target_bitrate,
+ vpx_enc_pass enc_pass) {
+ vpx_codec_enc_cfg_t enc_cfg = encoder_usage_cfg_map[0].cfg;
+ enc_cfg.g_w = frame_width;
+ enc_cfg.g_h = frame_height;
+ enc_cfg.rc_target_bitrate = target_bitrate;
+ enc_cfg.g_pass = enc_pass;
+ // g_timebase is the inverse of frame_rate
+ enc_cfg.g_timebase.num = frame_rate.den;
+ enc_cfg.g_timebase.den = frame_rate.num;
+ return enc_cfg;
+}
+
+static vp9_extracfg get_extra_cfg() {
+ vp9_extracfg extra_cfg = default_extra_cfg;
+ return extra_cfg;
+}
+
+VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height,
+ vpx_rational_t frame_rate,
+ int target_bitrate,
+ vpx_enc_pass enc_pass) {
+ /* This function will generate the same VP9EncoderConfig used by the
+ * vpxenc command given below.
+ * The configs in the vpxenc command corresponds to parameters of
+ * vp9_get_encoder_config() as follows.
+ *
+ * WIDTH: frame_width
+ * HEIGHT: frame_height
+ * FPS: frame_rate
+ * BITRATE: target_bitrate
+ *
+ * INPUT, OUTPUT, LIMIT will not affect VP9EncoderConfig
+ *
+ * vpxenc command:
+ * INPUT=bus_cif.y4m
+ * OUTPUT=output.webm
+ * WIDTH=352
+ * HEIGHT=288
+ * BITRATE=600
+ * FPS=30/1
+ * LIMIT=150
+ * ./vpxenc --limit=$LIMIT --width=$WIDTH --height=$HEIGHT --fps=$FPS
+ * --lag-in-frames=25 \
+ * --codec=vp9 --good --cpu-used=0 --threads=0 --profile=0 \
+ * --min-q=0 --max-q=63 --auto-alt-ref=1 --passes=2 --kf-max-dist=150 \
+ * --kf-min-dist=0 --drop-frame=0 --static-thresh=0 --bias-pct=50 \
+ * --minsection-pct=0 --maxsection-pct=150 --arnr-maxframes=7 --psnr \
+ * --arnr-strength=5 --sharpness=0 --undershoot-pct=100 --overshoot-pct=100 \
+ * --frame-parallel=0 --tile-columns=0 --cpu-used=0 --end-usage=vbr \
+ * --target-bitrate=$BITRATE -o $OUTPUT $INPUT
+ */
+
+ VP9EncoderConfig oxcf;
+ vp9_extracfg extra_cfg = get_extra_cfg();
+ vpx_codec_enc_cfg_t enc_cfg = get_enc_cfg(
+ frame_width, frame_height, frame_rate, target_bitrate, enc_pass);
+ set_encoder_config(&oxcf, &enc_cfg, &extra_cfg);
+
+ // These settings are made to match the settings of the vpxenc command.
+ oxcf.key_freq = 150;
+ oxcf.under_shoot_pct = 100;
+ oxcf.over_shoot_pct = 100;
+ oxcf.max_threads = 0;
+ oxcf.tile_columns = 0;
+ oxcf.frame_parallel_decoding_mode = 0;
+ oxcf.two_pass_vbrmax_section = 150;
+ return oxcf;
+}
+
+#define DUMP_STRUCT_VALUE(struct, value) \
+ printf(#value " %" PRId64 "\n", (int64_t)(struct)->value)
+
+void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf) {
+ DUMP_STRUCT_VALUE(oxcf, profile);
+ DUMP_STRUCT_VALUE(oxcf, bit_depth);
+ DUMP_STRUCT_VALUE(oxcf, width);
+ DUMP_STRUCT_VALUE(oxcf, height);
+ DUMP_STRUCT_VALUE(oxcf, input_bit_depth);
+ DUMP_STRUCT_VALUE(oxcf, init_framerate);
+ // TODO(angiebird): dump g_timebase
+ // TODO(angiebird): dump g_timebase_in_ts
+
+ DUMP_STRUCT_VALUE(oxcf, target_bandwidth);
+
+ DUMP_STRUCT_VALUE(oxcf, noise_sensitivity);
+ DUMP_STRUCT_VALUE(oxcf, sharpness);
+ DUMP_STRUCT_VALUE(oxcf, speed);
+ DUMP_STRUCT_VALUE(oxcf, rc_max_intra_bitrate_pct);
+ DUMP_STRUCT_VALUE(oxcf, rc_max_inter_bitrate_pct);
+ DUMP_STRUCT_VALUE(oxcf, gf_cbr_boost_pct);
+
+ DUMP_STRUCT_VALUE(oxcf, mode);
+ DUMP_STRUCT_VALUE(oxcf, pass);
+
+ // Key Framing Operations
+ DUMP_STRUCT_VALUE(oxcf, auto_key);
+ DUMP_STRUCT_VALUE(oxcf, key_freq);
+
+ DUMP_STRUCT_VALUE(oxcf, lag_in_frames);
+
+ // ----------------------------------------------------------------
+ // DATARATE CONTROL OPTIONS
+
+ // vbr, cbr, constrained quality or constant quality
+ DUMP_STRUCT_VALUE(oxcf, rc_mode);
+
+ // buffer targeting aggressiveness
+ DUMP_STRUCT_VALUE(oxcf, under_shoot_pct);
+ DUMP_STRUCT_VALUE(oxcf, over_shoot_pct);
+
+ // buffering parameters
+ // TODO(angiebird): dump tarting_buffer_level_ms
+ // TODO(angiebird): dump ptimal_buffer_level_ms
+ // TODO(angiebird): dump maximum_buffer_size_ms
+
+ // Frame drop threshold.
+ DUMP_STRUCT_VALUE(oxcf, drop_frames_water_mark);
+
+ // controlling quality
+ DUMP_STRUCT_VALUE(oxcf, fixed_q);
+ DUMP_STRUCT_VALUE(oxcf, worst_allowed_q);
+ DUMP_STRUCT_VALUE(oxcf, best_allowed_q);
+ DUMP_STRUCT_VALUE(oxcf, cq_level);
+ DUMP_STRUCT_VALUE(oxcf, aq_mode);
+
+ // Special handling of Adaptive Quantization for AltRef frames
+ DUMP_STRUCT_VALUE(oxcf, alt_ref_aq);
+
+ // Internal frame size scaling.
+ DUMP_STRUCT_VALUE(oxcf, resize_mode);
+ DUMP_STRUCT_VALUE(oxcf, scaled_frame_width);
+ DUMP_STRUCT_VALUE(oxcf, scaled_frame_height);
+
+ // Enable feature to reduce the frame quantization every x frames.
+ DUMP_STRUCT_VALUE(oxcf, frame_periodic_boost);
+
+ // two pass datarate control
+ DUMP_STRUCT_VALUE(oxcf, two_pass_vbrbias);
+ DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmin_section);
+ DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmax_section);
+ DUMP_STRUCT_VALUE(oxcf, vbr_corpus_complexity);
+ // END DATARATE CONTROL OPTIONS
+ // ----------------------------------------------------------------
+
+ // Spatial and temporal scalability.
+ DUMP_STRUCT_VALUE(oxcf, ss_number_layers);
+ DUMP_STRUCT_VALUE(oxcf, ts_number_layers);
+
+ // Bitrate allocation for spatial layers.
+ // TODO(angiebird): dump layer_target_bitrate[VPX_MAX_LAYERS]
+ // TODO(angiebird): dump ss_target_bitrate[VPX_SS_MAX_LAYERS]
+ // TODO(angiebird): dump ss_enable_auto_arf[VPX_SS_MAX_LAYERS]
+ // TODO(angiebird): dump ts_rate_decimator[VPX_TS_MAX_LAYERS]
+
+ DUMP_STRUCT_VALUE(oxcf, enable_auto_arf);
+ DUMP_STRUCT_VALUE(oxcf, encode_breakout);
+ DUMP_STRUCT_VALUE(oxcf, error_resilient_mode);
+ DUMP_STRUCT_VALUE(oxcf, frame_parallel_decoding_mode);
+
+ DUMP_STRUCT_VALUE(oxcf, arnr_max_frames);
+ DUMP_STRUCT_VALUE(oxcf, arnr_strength);
+
+ DUMP_STRUCT_VALUE(oxcf, min_gf_interval);
+ DUMP_STRUCT_VALUE(oxcf, max_gf_interval);
+
+ DUMP_STRUCT_VALUE(oxcf, tile_columns);
+ DUMP_STRUCT_VALUE(oxcf, tile_rows);
+
+ DUMP_STRUCT_VALUE(oxcf, enable_tpl_model);
+
+ DUMP_STRUCT_VALUE(oxcf, max_threads);
+
+ DUMP_STRUCT_VALUE(oxcf, target_level);
+
+ // TODO(angiebird): dump two_pass_stats_in
+
+#if CONFIG_FP_MB_STATS
+ // TODO(angiebird): dump firstpass_mb_stats_in
+#endif
+
+ DUMP_STRUCT_VALUE(oxcf, tuning);
+ DUMP_STRUCT_VALUE(oxcf, content);
+#if CONFIG_VP9_HIGHBITDEPTH
+ DUMP_STRUCT_VALUE(oxcf, use_highbitdepth);
+#endif
+ DUMP_STRUCT_VALUE(oxcf, color_space);
+ DUMP_STRUCT_VALUE(oxcf, color_range);
+ DUMP_STRUCT_VALUE(oxcf, render_width);
+ DUMP_STRUCT_VALUE(oxcf, render_height);
+ DUMP_STRUCT_VALUE(oxcf, temporal_layering_mode);
+
+ DUMP_STRUCT_VALUE(oxcf, row_mt);
+ DUMP_STRUCT_VALUE(oxcf, motion_vector_unit_test);
+}
+
+FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf) {
+ FRAME_INFO frame_info;
+ int dummy;
+ frame_info.frame_width = oxcf->width;
+ frame_info.frame_height = oxcf->height;
+ frame_info.render_frame_width = oxcf->width;
+ frame_info.render_frame_height = oxcf->height;
+ frame_info.bit_depth = oxcf->bit_depth;
+ vp9_set_mi_size(&frame_info.mi_rows, &frame_info.mi_cols, &dummy,
+ frame_info.frame_width, frame_info.frame_height);
+ vp9_set_mb_size(&frame_info.mb_rows, &frame_info.mb_cols, &frame_info.num_mbs,
+ frame_info.mi_rows, frame_info.mi_cols);
+ // TODO(angiebird): Figure out how to get subsampling_x/y here
+ return frame_info;
+}
+
+void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf,
+ const vpx_fixed_buf_t *stats) {
+ oxcf->two_pass_stats_in = *stats;
+}
diff --git a/libvpx/vp9/vp9_cx_iface.h b/libvpx/vp9/vp9_cx_iface.h
new file mode 100644
index 000000000..08569fcc9
--- /dev/null
+++ b/libvpx/vp9/vp9_cx_iface.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_VP9_CX_IFACE_H_
+#define VPX_VP9_VP9_CX_IFACE_H_
+#include "vp9/encoder/vp9_encoder.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height,
+ vpx_rational_t frame_rate,
+ int target_bitrate,
+ vpx_enc_pass enc_pass);
+
+void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf);
+
+FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf);
+
+static INLINE int64_t
+timebase_units_to_ticks(const vpx_rational64_t *timestamp_ratio, int64_t n) {
+ return n * timestamp_ratio->num / timestamp_ratio->den;
+}
+
+static INLINE int64_t
+ticks_to_timebase_units(const vpx_rational64_t *timestamp_ratio, int64_t n) {
+ int64_t round = timestamp_ratio->num / 2;
+ if (round > 0) --round;
+ return (n * timestamp_ratio->den + round) / timestamp_ratio->num;
+}
+
+void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf,
+ const vpx_fixed_buf_t *stats);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_VP9_VP9_CX_IFACE_H_
diff --git a/libvpx/vp9/vp9_iface_common.c b/libvpx/vp9/vp9_iface_common.c
new file mode 100644
index 000000000..74d08a587
--- /dev/null
+++ b/libvpx/vp9/vp9_iface_common.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file in the root of the source tree. An additional
+ * intellectual property rights grant can be found in the file PATENTS.
+ * All contributing project authors may be found in the AUTHORS file in
+ * the root of the source tree.
+ */
+
+#include "vp9/vp9_iface_common.h"
+void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
+ void *user_priv) {
+ /** vpx_img_wrap() doesn't allow specifying independent strides for
+ * the Y, U, and V planes, nor other alignment adjustments that
+ * might be representable by a YV12_BUFFER_CONFIG, so we just
+ * initialize all the fields.*/
+ int bps;
+ if (!yv12->subsampling_y) {
+ if (!yv12->subsampling_x) {
+ img->fmt = VPX_IMG_FMT_I444;
+ bps = 24;
+ } else {
+ img->fmt = VPX_IMG_FMT_I422;
+ bps = 16;
+ }
+ } else {
+ if (!yv12->subsampling_x) {
+ img->fmt = VPX_IMG_FMT_I440;
+ bps = 16;
+ } else {
+ img->fmt = VPX_IMG_FMT_I420;
+ bps = 12;
+ }
+ }
+ img->cs = yv12->color_space;
+ img->range = yv12->color_range;
+ img->bit_depth = 8;
+ img->w = yv12->y_stride;
+ img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
+ img->d_w = yv12->y_crop_width;
+ img->d_h = yv12->y_crop_height;
+ img->r_w = yv12->render_width;
+ img->r_h = yv12->render_height;
+ img->x_chroma_shift = yv12->subsampling_x;
+ img->y_chroma_shift = yv12->subsampling_y;
+ img->planes[VPX_PLANE_Y] = yv12->y_buffer;
+ img->planes[VPX_PLANE_U] = yv12->u_buffer;
+ img->planes[VPX_PLANE_V] = yv12->v_buffer;
+ img->planes[VPX_PLANE_ALPHA] = NULL;
+ img->stride[VPX_PLANE_Y] = yv12->y_stride;
+ img->stride[VPX_PLANE_U] = yv12->uv_stride;
+ img->stride[VPX_PLANE_V] = yv12->uv_stride;
+ img->stride[VPX_PLANE_ALPHA] = yv12->y_stride;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) {
+ // vpx_image_t uses byte strides and a pointer to the first byte
+ // of the image.
+ img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH);
+ img->bit_depth = yv12->bit_depth;
+ img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer);
+ img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer);
+ img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer);
+ img->planes[VPX_PLANE_ALPHA] = NULL;
+ img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride;
+ img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride;
+ img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride;
+ img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ img->bps = bps;
+ img->user_priv = user_priv;
+ img->img_data = yv12->buffer_alloc;
+ img->img_data_owner = 0;
+ img->self_allocd = 0;
+}
+
+vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
+ YV12_BUFFER_CONFIG *yv12) {
+ yv12->y_buffer = img->planes[VPX_PLANE_Y];
+ yv12->u_buffer = img->planes[VPX_PLANE_U];
+ yv12->v_buffer = img->planes[VPX_PLANE_V];
+
+ yv12->y_crop_width = img->d_w;
+ yv12->y_crop_height = img->d_h;
+ yv12->render_width = img->r_w;
+ yv12->render_height = img->r_h;
+ yv12->y_width = img->d_w;
+ yv12->y_height = img->d_h;
+
+ yv12->uv_width =
+ img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width;
+ yv12->uv_height =
+ img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height;
+ yv12->uv_crop_width = yv12->uv_width;
+ yv12->uv_crop_height = yv12->uv_height;
+
+ yv12->y_stride = img->stride[VPX_PLANE_Y];
+ yv12->uv_stride = img->stride[VPX_PLANE_U];
+ yv12->color_space = img->cs;
+ yv12->color_range = img->range;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
+ // In vpx_image_t
+ // planes point to uint8 address of start of data
+ // stride counts uint8s to reach next row
+ // In YV12_BUFFER_CONFIG
+ // y_buffer, u_buffer, v_buffer point to uint16 address of data
+ // stride and border counts in uint16s
+ // This means that all the address calculations in the main body of code
+ // should work correctly.
+ // However, before we do any pixel operations we need to cast the address
+ // to a uint16 ponter and double its value.
+ yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer);
+ yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer);
+ yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer);
+ yv12->y_stride >>= 1;
+ yv12->uv_stride >>= 1;
+ yv12->flags = YV12_FLAG_HIGHBITDEPTH;
+ } else {
+ yv12->flags = 0;
+ }
+ yv12->border = (yv12->y_stride - img->w) / 2;
+#else
+ yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ yv12->subsampling_x = img->x_chroma_shift;
+ yv12->subsampling_y = img->y_chroma_shift;
+ return VPX_CODEC_OK;
+}
diff --git a/libvpx/vp9/vp9_iface_common.h b/libvpx/vp9/vp9_iface_common.h
index a1921db63..e646917c6 100644
--- a/libvpx/vp9/vp9_iface_common.h
+++ b/libvpx/vp9/vp9_iface_common.h
@@ -10,130 +10,24 @@
#ifndef VPX_VP9_VP9_IFACE_COMMON_H_
#define VPX_VP9_VP9_IFACE_COMMON_H_
+#include <assert.h>
#include "vpx_ports/mem.h"
+#include "vpx/vp8.h"
+#include "vpx_scale/yv12config.h"
+#include "common/vp9_enums.h"
-static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
- void *user_priv) {
- /** vpx_img_wrap() doesn't allow specifying independent strides for
- * the Y, U, and V planes, nor other alignment adjustments that
- * might be representable by a YV12_BUFFER_CONFIG, so we just
- * initialize all the fields.*/
- int bps;
- if (!yv12->subsampling_y) {
- if (!yv12->subsampling_x) {
- img->fmt = VPX_IMG_FMT_I444;
- bps = 24;
- } else {
- img->fmt = VPX_IMG_FMT_I422;
- bps = 16;
- }
- } else {
- if (!yv12->subsampling_x) {
- img->fmt = VPX_IMG_FMT_I440;
- bps = 16;
- } else {
- img->fmt = VPX_IMG_FMT_I420;
- bps = 12;
- }
- }
- img->cs = yv12->color_space;
- img->range = yv12->color_range;
- img->bit_depth = 8;
- img->w = yv12->y_stride;
- img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
- img->d_w = yv12->y_crop_width;
- img->d_h = yv12->y_crop_height;
- img->r_w = yv12->render_width;
- img->r_h = yv12->render_height;
- img->x_chroma_shift = yv12->subsampling_x;
- img->y_chroma_shift = yv12->subsampling_y;
- img->planes[VPX_PLANE_Y] = yv12->y_buffer;
- img->planes[VPX_PLANE_U] = yv12->u_buffer;
- img->planes[VPX_PLANE_V] = yv12->v_buffer;
- img->planes[VPX_PLANE_ALPHA] = NULL;
- img->stride[VPX_PLANE_Y] = yv12->y_stride;
- img->stride[VPX_PLANE_U] = yv12->uv_stride;
- img->stride[VPX_PLANE_V] = yv12->uv_stride;
- img->stride[VPX_PLANE_ALPHA] = yv12->y_stride;
-#if CONFIG_VP9_HIGHBITDEPTH
- if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) {
- // vpx_image_t uses byte strides and a pointer to the first byte
- // of the image.
- img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH);
- img->bit_depth = yv12->bit_depth;
- img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer);
- img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer);
- img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer);
- img->planes[VPX_PLANE_ALPHA] = NULL;
- img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride;
- img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride;
- img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride;
- img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride;
- }
-#endif // CONFIG_VP9_HIGHBITDEPTH
- img->bps = bps;
- img->user_priv = user_priv;
- img->img_data = yv12->buffer_alloc;
- img->img_data_owner = 0;
- img->self_allocd = 0;
-}
-
-static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
- YV12_BUFFER_CONFIG *yv12) {
- yv12->y_buffer = img->planes[VPX_PLANE_Y];
- yv12->u_buffer = img->planes[VPX_PLANE_U];
- yv12->v_buffer = img->planes[VPX_PLANE_V];
-
- yv12->y_crop_width = img->d_w;
- yv12->y_crop_height = img->d_h;
- yv12->render_width = img->r_w;
- yv12->render_height = img->r_h;
- yv12->y_width = img->d_w;
- yv12->y_height = img->d_h;
+#ifdef __cplusplus
+extern "C" {
+#endif
- yv12->uv_width =
- img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width;
- yv12->uv_height =
- img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height;
- yv12->uv_crop_width = yv12->uv_width;
- yv12->uv_crop_height = yv12->uv_height;
+void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
+ void *user_priv);
- yv12->y_stride = img->stride[VPX_PLANE_Y];
- yv12->uv_stride = img->stride[VPX_PLANE_U];
- yv12->color_space = img->cs;
- yv12->color_range = img->range;
+vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
+ YV12_BUFFER_CONFIG *yv12);
-#if CONFIG_VP9_HIGHBITDEPTH
- if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
- // In vpx_image_t
- // planes point to uint8 address of start of data
- // stride counts uint8s to reach next row
- // In YV12_BUFFER_CONFIG
- // y_buffer, u_buffer, v_buffer point to uint16 address of data
- // stride and border counts in uint16s
- // This means that all the address calculations in the main body of code
- // should work correctly.
- // However, before we do any pixel operations we need to cast the address
- // to a uint16 ponter and double its value.
- yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer);
- yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer);
- yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer);
- yv12->y_stride >>= 1;
- yv12->uv_stride >>= 1;
- yv12->flags = YV12_FLAG_HIGHBITDEPTH;
- } else {
- yv12->flags = 0;
- }
- yv12->border = (yv12->y_stride - img->w) / 2;
-#else
- yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
-#endif // CONFIG_VP9_HIGHBITDEPTH
- yv12->subsampling_x = img->x_chroma_shift;
- yv12->subsampling_y = img->y_chroma_shift;
- return VPX_CODEC_OK;
-}
-
-static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
+static INLINE VP9_REFFRAME
+ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
switch (frame) {
case VP8_LAST_FRAME: return VP9_LAST_FLAG;
case VP8_GOLD_FRAME: return VP9_GOLD_FLAG;
@@ -142,4 +36,9 @@ static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
assert(0 && "Invalid Reference Frame");
return VP9_LAST_FLAG;
}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VPX_VP9_VP9_IFACE_COMMON_H_
diff --git a/libvpx/vp9/vp9cx.mk b/libvpx/vp9/vp9cx.mk
index 736ff0170..ad774505c 100644
--- a/libvpx/vp9/vp9cx.mk
+++ b/libvpx/vp9/vp9cx.mk
@@ -16,6 +16,10 @@ VP9_CX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes)
VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no)
VP9_CX_SRCS-yes += vp9_cx_iface.c
+VP9_CX_SRCS-yes += vp9_cx_iface.h
+
+VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.cc
+VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.h
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
@@ -76,6 +80,8 @@ VP9_CX_SRCS-yes += encoder/vp9_resize.c
VP9_CX_SRCS-yes += encoder/vp9_resize.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.c
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.h
+VP9_CX_SRCS-$(CONFIG_NON_GREEDY_MV) += encoder/vp9_non_greedy_mv.c
+VP9_CX_SRCS-$(CONFIG_NON_GREEDY_MV) += encoder/vp9_non_greedy_mv.h
VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
@@ -116,7 +122,7 @@ endif
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
-ifeq ($(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86_64),yes)
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
endif