47 files changed, 3557 insertions, 1892 deletions
diff --git a/libvpx/vp9/common/vp9_alloccommon.c b/libvpx/vp9/common/vp9_alloccommon.c
index 7345e259b..5702dca71 100644
--- a/libvpx/vp9/common/vp9_alloccommon.c
+++ b/libvpx/vp9/common/vp9_alloccommon.c
@@ -17,17 +17,26 @@
 #include "vp9/common/vp9_entropymv.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
-void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) {
+void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width,
+                     int height) {
   const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
   const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
+  *mi_cols = aligned_width >> MI_SIZE_LOG2;
+  *mi_rows = aligned_height >> MI_SIZE_LOG2;
+  *mi_stride = calc_mi_size(*mi_cols);
+}
 
-  cm->mi_cols = aligned_width >> MI_SIZE_LOG2;
-  cm->mi_rows = aligned_height >> MI_SIZE_LOG2;
-  cm->mi_stride = calc_mi_size(cm->mi_cols);
+void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows,
+                     int mi_cols) {
+  *mb_cols = (mi_cols + 1) >> 1;
+  *mb_rows = (mi_rows + 1) >> 1;
+  *mb_num = (*mb_rows) * (*mb_cols);
+}
 
-  cm->mb_cols = (cm->mi_cols + 1) >> 1;
-  cm->mb_rows = (cm->mi_rows + 1) >> 1;
-  cm->MBs = cm->mb_rows * cm->mb_cols;
+void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) {
+  vp9_set_mi_size(&cm->mi_rows, &cm->mi_cols, &cm->mi_stride, width, height);
+  vp9_set_mb_size(&cm->mb_rows, &cm->mb_cols, &cm->MBs, cm->mi_rows,
+                  cm->mi_cols);
 }
 
 static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) {
diff --git a/libvpx/vp9/common/vp9_alloccommon.h b/libvpx/vp9/common/vp9_alloccommon.h
index 8900038ea..90cbb093d 100644
--- a/libvpx/vp9/common/vp9_alloccommon.h
+++ b/libvpx/vp9/common/vp9_alloccommon.h
@@ -33,6 +33,11 @@ void vp9_free_postproc_buffers(struct VP9Common *cm);
 int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height);
 void vp9_free_state_buffers(struct VP9Common *cm);
 
+void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width,
+                     int height);
+void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows,
+                     int mi_cols);
+
 void vp9_set_mb_mi(struct VP9Common *cm, int width, int height);
 
 void vp9_swap_current_and_last_seg_map(struct VP9Common *cm);
diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h
index 2ddc0f121..6ef8127a5 100644
--- a/libvpx/vp9/common/vp9_blockd.h
+++ b/libvpx/vp9/common/vp9_blockd.h
@@ -60,6 +60,7 @@ typedef struct {
 #define GOLDEN_FRAME 2
 #define ALTREF_FRAME 3
 #define MAX_REF_FRAMES 4
+#define MAX_INTER_REF_FRAMES 3
 
 typedef int8_t MV_REFERENCE_FRAME;
 
diff --git a/libvpx/vp9/common/vp9_mv.h b/libvpx/vp9/common/vp9_mv.h
index 14dde7dd0..76f93cf0b 100644
--- a/libvpx/vp9/common/vp9_mv.h
+++ b/libvpx/vp9/common/vp9_mv.h
@@ -19,6 +19,8 @@
 extern "C" {
 #endif
 
+#define INVALID_MV 0x80008000
+
 typedef struct mv {
   int16_t row;
   int16_t col;
diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h
index 662b8ef5e..f3942a8f0 100644
--- a/libvpx/vp9/common/vp9_onyxc_int.h
+++ b/libvpx/vp9/common/vp9_onyxc_int.h
@@ -244,14 +244,6 @@ typedef struct VP9Common {
   int byte_alignment;
   int skip_loop_filter;
 
-  // Private data associated with the frame buffer callbacks.
-  void *cb_priv;
-  vpx_get_frame_buffer_cb_fn_t get_fb_cb;
-  vpx_release_frame_buffer_cb_fn_t release_fb_cb;
-
-  // Handles memory for the codec.
-  InternalFrameBufferList int_frame_buffers;
-
   // External BufferPool passed from outside.
   BufferPool *buffer_pool;
 
@@ -262,6 +254,34 @@ typedef struct VP9Common {
   int lf_row;
 } VP9_COMMON;
 
+typedef struct {
+  int frame_width;
+  int frame_height;
+  int render_frame_width;
+  int render_frame_height;
+  int mi_rows;
+  int mi_cols;
+  int mb_rows;
+  int mb_cols;
+  int num_mbs;
+  vpx_bit_depth_t bit_depth;
+} FRAME_INFO;
+
+static INLINE void init_frame_info(FRAME_INFO *frame_info,
+                                   const VP9_COMMON *cm) {
+  frame_info->frame_width = cm->width;
+  frame_info->frame_height = cm->height;
+  frame_info->render_frame_width = cm->render_width;
+  frame_info->render_frame_height = cm->render_height;
+  frame_info->mi_cols = cm->mi_cols;
+  frame_info->mi_rows = cm->mi_rows;
+  frame_info->mb_cols = cm->mb_cols;
+  frame_info->mb_rows = cm->mb_rows;
+  frame_info->num_mbs = cm->MBs;
+  frame_info->bit_depth = cm->bit_depth;
+  // TODO(angiebird): Figure out how to get subsampling_x/y here
+}
+
 static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) {
   if (index < 0 || index >= FRAME_BUFFERS) return NULL;
   if (cm->error.error_code != VPX_CODEC_OK) return NULL;
diff --git a/libvpx/vp9/common/vp9_postproc.c b/libvpx/vp9/common/vp9_postproc.c
index 5373b0218..d2c8535b0 100644
--- a/libvpx/vp9/common/vp9_postproc.c
+++ b/libvpx/vp9/common/vp9_postproc.c
@@ -183,7 +183,8 @@ void vp9_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch, int rows, int cols,
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
+static void deblock_and_de_macro_block(VP9_COMMON *cm,
+                                       YV12_BUFFER_CONFIG *source,
                                        YV12_BUFFER_CONFIG *post, int q,
                                        int low_var_thresh, int flag,
                                        uint8_t *limits) {
@@ -216,7 +217,7 @@ static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
         source->uv_height, source->uv_width, ppl);
   } else {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-    vp9_deblock(source, post, q, limits);
+    vp9_deblock(cm, source, post, q, limits);
     vpx_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
                               post->y_width, q2mbl(q));
     vpx_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
@@ -226,8 +227,8 @@ static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 }
 
-void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
-                 uint8_t *limits) {
+void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+                 YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) {
   const int ppl =
       (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q + 0.0065 + 0.5);
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -252,9 +253,8 @@ void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
   } else {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
     int mbr;
-    const int mb_rows = src->y_height / 16;
-    const int mb_cols = src->y_width / 16;
-
+    const int mb_rows = cm->mb_rows;
+    const int mb_cols = cm->mb_cols;
     memset(limits, (unsigned char)ppl, 16 * mb_cols);
 
     for (mbr = 0; mbr < mb_rows; mbr++) {
@@ -276,9 +276,9 @@ void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 }
 
-void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
-                 uint8_t *limits) {
-  vp9_deblock(src, dst, q, limits);
+void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+                 YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) {
+  vp9_deblock(cm, src, dst, q, limits);
 }
 
 static void swap_mi_and_prev_mi(VP9_COMMON *cm) {
@@ -383,21 +383,21 @@ int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest,
       vpx_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int);
     }
     if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) {
-      deblock_and_de_macro_block(&cm->post_proc_buffer_int, ppbuf,
+      deblock_and_de_macro_block(cm, &cm->post_proc_buffer_int, ppbuf,
                                  q + (ppflags->deblocking_level - 5) * 10, 1, 0,
                                  cm->postproc_state.limits);
     } else if (flags & VP9D_DEBLOCK) {
-      vp9_deblock(&cm->post_proc_buffer_int, ppbuf, q,
+      vp9_deblock(cm, &cm->post_proc_buffer_int, ppbuf, q,
                   cm->postproc_state.limits);
     } else {
       vpx_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf);
     }
   } else if (flags & VP9D_DEMACROBLOCK) {
-    deblock_and_de_macro_block(cm->frame_to_show, ppbuf,
+    deblock_and_de_macro_block(cm, cm->frame_to_show, ppbuf,
                                q + (ppflags->deblocking_level - 5) * 10, 1, 0,
                                cm->postproc_state.limits);
   } else if (flags & VP9D_DEBLOCK) {
-    vp9_deblock(cm->frame_to_show, ppbuf, q, cm->postproc_state.limits);
+    vp9_deblock(cm, cm->frame_to_show, ppbuf, q, cm->postproc_state.limits);
   } else {
     vpx_yv12_copy_frame(cm->frame_to_show, ppbuf);
   }
diff --git a/libvpx/vp9/common/vp9_postproc.h b/libvpx/vp9/common/vp9_postproc.h
index 67efc1b4e..bbe3aed83 100644
--- a/libvpx/vp9/common/vp9_postproc.h
+++ b/libvpx/vp9/common/vp9_postproc.h
@@ -40,11 +40,11 @@ struct VP9Common;
 int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest,
                         vp9_ppflags_t *ppflags, int unscaled_width);
 
-void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
-                 uint8_t *limits);
+void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+                 YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits);
 
-void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
-                 uint8_t *limits);
+void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src,
+                 YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c
index 04f41e6a3..ff59ff504 100644
--- a/libvpx/vp9/common/vp9_reconinter.c
+++ b/libvpx/vp9/common/vp9_reconinter.c
@@ -96,8 +96,8 @@ MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, int bw,
   const int spel_right = spel_left - SUBPEL_SHIFTS;
   const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS;
   const int spel_bottom = spel_top - SUBPEL_SHIFTS;
-  MV clamped_mv = { src_mv->row * (1 << (1 - ss_y)),
-                    src_mv->col * (1 << (1 - ss_x)) };
+  MV clamped_mv = { (short)(src_mv->row * (1 << (1 - ss_y))),
+                    (short)(src_mv->col * (1 << (1 - ss_x))) };
   assert(ss_x <= 1);
   assert(ss_y <= 1);
 
diff --git a/libvpx/vp9/common/vp9_thread_common.c b/libvpx/vp9/common/vp9_thread_common.c
index c79d9b7f0..b3d50162b 100644
--- a/libvpx/vp9/common/vp9_thread_common.c
+++ b/libvpx/vp9/common/vp9_thread_common.c
@@ -298,7 +298,10 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
         pthread_cond_init(&lf_sync->cond[i], NULL);
       }
     }
-    pthread_mutex_init(&lf_sync->lf_mutex, NULL);
+
+    CHECK_MEM_ERROR(cm, lf_sync->lf_mutex,
+                    vpx_malloc(sizeof(*lf_sync->lf_mutex)));
+    pthread_mutex_init(lf_sync->lf_mutex, NULL);
 
     CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex,
                     vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows));
@@ -339,47 +342,50 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
 
 // Deallocate lf synchronization related mutex and data
 void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) {
-  if (lf_sync != NULL) {
+  assert(lf_sync != NULL);
+
 #if CONFIG_MULTITHREAD
+  if (lf_sync->mutex != NULL) {
     int i;
-
-    if (lf_sync->mutex != NULL) {
-      for (i = 0; i < lf_sync->rows; ++i) {
-        pthread_mutex_destroy(&lf_sync->mutex[i]);
-      }
-      vpx_free(lf_sync->mutex);
+    for (i = 0; i < lf_sync->rows; ++i) {
+      pthread_mutex_destroy(&lf_sync->mutex[i]);
     }
-    if (lf_sync->cond != NULL) {
-      for (i = 0; i < lf_sync->rows; ++i) {
-        pthread_cond_destroy(&lf_sync->cond[i]);
-      }
-      vpx_free(lf_sync->cond);
+    vpx_free(lf_sync->mutex);
+  }
+  if (lf_sync->cond != NULL) {
+    int i;
+    for (i = 0; i < lf_sync->rows; ++i) {
+      pthread_cond_destroy(&lf_sync->cond[i]);
     }
-    if (lf_sync->recon_done_mutex != NULL) {
-      int i;
-      for (i = 0; i < lf_sync->rows; ++i) {
-        pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
-      }
-      vpx_free(lf_sync->recon_done_mutex);
+    vpx_free(lf_sync->cond);
+  }
+  if (lf_sync->recon_done_mutex != NULL) {
+    int i;
+    for (i = 0; i < lf_sync->rows; ++i) {
+      pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
     }
+    vpx_free(lf_sync->recon_done_mutex);
+  }
 
-    pthread_mutex_destroy(&lf_sync->lf_mutex);
-    if (lf_sync->recon_done_cond != NULL) {
-      int i;
-      for (i = 0; i < lf_sync->rows; ++i) {
-        pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
-      }
-      vpx_free(lf_sync->recon_done_cond);
+  if (lf_sync->lf_mutex != NULL) {
+    pthread_mutex_destroy(lf_sync->lf_mutex);
+    vpx_free(lf_sync->lf_mutex);
+  }
+  if (lf_sync->recon_done_cond != NULL) {
+    int i;
+    for (i = 0; i < lf_sync->rows; ++i) {
+      pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
     }
+    vpx_free(lf_sync->recon_done_cond);
+  }
 #endif  // CONFIG_MULTITHREAD
 
-    vpx_free(lf_sync->lfdata);
-    vpx_free(lf_sync->cur_sb_col);
-    vpx_free(lf_sync->num_tiles_done);
-    // clear the structure as the source of this call may be a resize in which
-    // case this call will be followed by an _alloc() which may fail.
-    vp9_zero(*lf_sync);
-  }
+  vpx_free(lf_sync->lfdata);
+  vpx_free(lf_sync->cur_sb_col);
+  vpx_free(lf_sync->num_tiles_done);
+  // clear the structure as the source of this call may be a resize in which
+  // case this call will be followed by an _alloc() which may fail.
+  vp9_zero(*lf_sync);
 }
 
 static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
@@ -390,7 +396,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
 #if CONFIG_MULTITHREAD
   const int tile_cols = 1 << cm->log2_tile_cols;
 
-  pthread_mutex_lock(&lf_sync->lf_mutex);
+  pthread_mutex_lock(lf_sync->lf_mutex);
   if (cm->lf_row < max_rows) {
     cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
     return_val = cm->lf_row;
@@ -401,7 +407,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
       cur_row += 1;
     }
   }
-  pthread_mutex_unlock(&lf_sync->lf_mutex);
+  pthread_mutex_unlock(lf_sync->lf_mutex);
 
   if (return_val == -1) return return_val;
 
@@ -411,7 +417,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
                       &lf_sync->recon_done_mutex[cur_row]);
   }
   pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]);
-  pthread_mutex_lock(&lf_sync->lf_mutex);
+  pthread_mutex_lock(lf_sync->lf_mutex);
   if (lf_sync->corrupted) {
     int row = return_val >> MI_BLOCK_SIZE_LOG2;
     pthread_mutex_lock(&lf_sync->mutex[row]);
@@ -420,7 +426,7 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
     pthread_mutex_unlock(&lf_sync->mutex[row]);
     return_val = -1;
   }
-  pthread_mutex_unlock(&lf_sync->lf_mutex);
+  pthread_mutex_unlock(lf_sync->lf_mutex);
 #else
   (void)lf_sync;
   if (cm->lf_row < max_rows) {
@@ -455,9 +461,9 @@ void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync) {
 void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
                  int corrupted) {
 #if CONFIG_MULTITHREAD
-  pthread_mutex_lock(&lf_sync->lf_mutex);
+  pthread_mutex_lock(lf_sync->lf_mutex);
   lf_sync->corrupted |= corrupted;
-  pthread_mutex_unlock(&lf_sync->lf_mutex);
+  pthread_mutex_unlock(lf_sync->lf_mutex);
   pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
   lf_sync->num_tiles_done[row] += 1;
   if (num_tiles == lf_sync->num_tiles_done[row]) {
diff --git a/libvpx/vp9/common/vp9_thread_common.h b/libvpx/vp9/common/vp9_thread_common.h
index 94c9de659..5df0117f1 100644
--- a/libvpx/vp9/common/vp9_thread_common.h
+++ b/libvpx/vp9/common/vp9_thread_common.h
@@ -40,7 +40,7 @@ typedef struct VP9LfSyncData {
   int num_active_workers;  // number of scheduled workers.
 
 #if CONFIG_MULTITHREAD
-  pthread_mutex_t lf_mutex;
+  pthread_mutex_t *lf_mutex;
   pthread_mutex_t *recon_done_mutex;
   pthread_cond_t *recon_done_cond;
 #endif
diff --git a/libvpx/vp9/decoder/vp9_decodeframe.c b/libvpx/vp9/decoder/vp9_decodeframe.c
index 7d66cb2b2..e8b386994 100644
--- a/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -529,16 +529,15 @@ static void high_build_mc_border(const uint8_t *src8, int src_stride,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 #if CONFIG_VP9_HIGHBITDEPTH
-static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
-                               int x0, int y0, int b_w, int b_h,
-                               int frame_width, int frame_height,
+static void extend_and_predict(TileWorkerData *twd, const uint8_t *buf_ptr1,
+                               int pre_buf_stride, int x0, int y0, int b_w,
+                               int b_h, int frame_width, int frame_height,
                                int border_offset, uint8_t *const dst,
                                int dst_buf_stride, int subpel_x, int subpel_y,
                                const InterpKernel *kernel,
                                const struct scale_factors *sf, MACROBLOCKD *xd,
                                int w, int h, int ref, int xs, int ys) {
-  DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]);
-
+  uint16_t *mc_buf_high = twd->extend_and_predict_buf;
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, x0, y0,
                          b_w, b_h, frame_width, frame_height);
@@ -554,15 +553,15 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
   }
 }
 #else
-static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
-                               int x0, int y0, int b_w, int b_h,
-                               int frame_width, int frame_height,
+static void extend_and_predict(TileWorkerData *twd, const uint8_t *buf_ptr1,
+                               int pre_buf_stride, int x0, int y0, int b_w,
+                               int b_h, int frame_width, int frame_height,
                                int border_offset, uint8_t *const dst,
                                int dst_buf_stride, int subpel_x, int subpel_y,
                                const InterpKernel *kernel,
                                const struct scale_factors *sf, int w, int h,
                                int ref, int xs, int ys) {
-  DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
+  uint8_t *mc_buf = (uint8_t *)twd->extend_and_predict_buf;
   const uint8_t *buf_ptr;
 
   build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, x0, y0, b_w, b_h,
@@ -575,8 +574,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 static void dec_build_inter_predictors(
-    MACROBLOCKD *xd, int plane, int bw, int bh, int x, int y, int w, int h,
-    int mi_x, int mi_y, const InterpKernel *kernel,
+    TileWorkerData *twd, MACROBLOCKD *xd, int plane, int bw, int bh, int x,
+    int y, int w, int h, int mi_x, int mi_y, const InterpKernel *kernel,
     const struct scale_factors *sf, struct buf_2d *pre_buf,
     struct buf_2d *dst_buf, const MV *mv, RefCntBuffer *ref_frame_buf,
     int is_scaled, int ref) {
@@ -687,9 +686,9 @@ static void dec_build_inter_predictors(
       const int b_h = y1 - y0 + 1;
       const int border_offset = y_pad * 3 * b_w + x_pad * 3;
 
-      extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h, frame_width,
-                         frame_height, border_offset, dst, dst_buf->stride,
-                         subpel_x, subpel_y, kernel, sf,
+      extend_and_predict(twd, buf_ptr1, buf_stride, x0, y0, b_w, b_h,
+                         frame_width, frame_height, border_offset, dst,
+                         dst_buf->stride, subpel_x, subpel_y, kernel, sf,
 #if CONFIG_VP9_HIGHBITDEPTH
                          xd,
 #endif
@@ -712,7 +711,8 @@ static void dec_build_inter_predictors(
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 }
 
-static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
+static void dec_build_inter_predictors_sb(TileWorkerData *twd,
+                                          VP9Decoder *const pbi,
                                           MACROBLOCKD *xd, int mi_row,
                                           int mi_col) {
   int plane;
@@ -755,10 +755,10 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
         for (y = 0; y < num_4x4_h; ++y) {
           for (x = 0; x < num_4x4_w; ++x) {
             const MV mv = average_split_mvs(pd, mi, ref, i++);
-            dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 4 * x, 4 * y,
-                                       4, 4, mi_x, mi_y, kernel, sf, pre_buf,
-                                       dst_buf, &mv, ref_frame_buf, is_scaled,
-                                       ref);
+            dec_build_inter_predictors(twd, xd, plane, n4w_x4, n4h_x4, 4 * x,
+                                       4 * y, 4, 4, mi_x, mi_y, kernel, sf,
+                                       pre_buf, dst_buf, &mv, ref_frame_buf,
+                                       is_scaled, ref);
           }
         }
       }
@@ -772,7 +772,7 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
         const int n4w_x4 = 4 * num_4x4_w;
         const int n4h_x4 = 4 * num_4x4_h;
         struct buf_2d *const pre_buf = &pd->pre[ref];
-        dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4,
+        dec_build_inter_predictors(twd, xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4,
                                    n4h_x4, mi_x, mi_y, kernel, sf, pre_buf,
                                    dst_buf, &mv, ref_frame_buf, is_scaled, ref);
       }
@@ -964,7 +964,7 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row,
     }
   } else {
     // Prediction
-    dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
+    dec_build_inter_predictors_sb(twd, pbi, xd, mi_row, mi_col);
 #if CONFIG_MISMATCH_DEBUG
     {
       int plane;
@@ -1048,7 +1048,7 @@ static void recon_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row,
                         predict_and_reconstruct_intra_block_row_mt);
   } else {
     // Prediction
-    dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
+    dec_build_inter_predictors_sb(twd, pbi, xd, mi_row, mi_col);
 
     // Reconstruction
     if (!mi->skip) {
@@ -1733,9 +1733,9 @@ static int lpf_map_write_check(VP9LfSync *lf_sync, int row, int num_tile_cols) {
   int return_val = 0;
 #if CONFIG_MULTITHREAD
   int corrupted;
-  pthread_mutex_lock(&lf_sync->lf_mutex);
+  pthread_mutex_lock(lf_sync->lf_mutex);
   corrupted = lf_sync->corrupted;
-  pthread_mutex_unlock(&lf_sync->lf_mutex);
+  pthread_mutex_unlock(lf_sync->lf_mutex);
   if (!corrupted) {
     pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
     lf_sync->num_tiles_done[row] += 1;
@@ -1905,6 +1905,7 @@ static int row_decode_worker_hook(void *arg1, void *arg2) {
   LFWorkerData *lf_data = thread_data->lf_data;
   VP9LfSync *lf_sync = thread_data->lf_sync;
   volatile int corrupted = 0;
+  TileWorkerData *volatile tile_data_recon = NULL;
 
   while (!vp9_jobq_dequeue(&row_mt_worker_data->jobq, &job, sizeof(job), 1)) {
     int mi_col;
@@ -1921,9 +1922,10 @@ static int row_decode_worker_hook(void *arg1, void *arg2) {
     } else if (job.job_type == RECON_JOB) {
       const int cur_sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
       const int is_last_row = sb_rows - 1 == cur_sb_row;
-      TileWorkerData twd_recon;
-      TileWorkerData *const tile_data_recon = &twd_recon;
       int mi_col_start, mi_col_end;
+      if (!tile_data_recon)
+        CHECK_MEM_ERROR(cm, tile_data_recon,
+                        vpx_memalign(32, sizeof(TileWorkerData)));
 
       tile_data_recon->xd = pbi->mb;
       vp9_tile_init(&tile_data_recon->xd.tile, cm, 0, job.tile_col);
@@ -2006,6 +2008,7 @@ static int row_decode_worker_hook(void *arg1, void *arg2) {
     }
   }
 
+  vpx_free(tile_data_recon);
   return !corrupted;
 }
 
diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c
index 943fe478a..49c675394 100644
--- a/libvpx/vp9/decoder/vp9_decodemv.c
+++ b/libvpx/vp9/decoder/vp9_decodemv.c
@@ -444,17 +444,6 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   }
 }
 
-static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv,
-                                  int refmv_count) {
-  int i;
-
-  // Make sure all the candidates are properly clamped etc
-  for (i = 0; i < refmv_count; ++i) {
-    lower_mv_precision(&mvlist[i].as_mv, allow_hp);
-    *best_mv = mvlist[i];
-  }
-}
-
 // This macro is used to add a motion vector mv_ref list if it isn't
 // already in the list.  If it's the second motion vector or early_break
 // it will also skip all additional processing and jump to Done!
@@ -494,7 +483,7 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                             PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame,
                             const POSITION *const mv_ref_search,
                             int_mv *mv_ref_list, int mi_row, int mi_col,
-                            int block, int is_sub8x8) {
+                            int block) {
   const int *ref_sign_bias = cm->ref_frame_sign_bias;
   int i, refmv_count = 0;
   int different_ref_found = 0;
@@ -511,7 +500,7 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
   memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
 
   i = 0;
-  if (is_sub8x8) {
+  if (block >= 0) {
     // If the size < 8x8 we get the mv from the bmi substructure for the
     // nearest two blocks.
     for (i = 0; i < 2; ++i) {
@@ -628,19 +617,22 @@ static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
 
   assert(MAX_MV_REF_CANDIDATES == 2);
 
-  refmv_count =
-      dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search,
-                       mv_list, mi_row, mi_col, block, 1);
-
   switch (block) {
-    case 0: best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; break;
+    case 0:
+      refmv_count =
+          dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search,
+                           mv_list, mi_row, mi_col, block);
+      best_sub8x8->as_int = mv_list[refmv_count - 1].as_int;
+      break;
     case 1:
     case 2:
       if (b_mode == NEARESTMV) {
         best_sub8x8->as_int = bmi[0].as_mv[ref].as_int;
       } else {
+        dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search,
+                         mv_list, mi_row, mi_col, block);
         best_sub8x8->as_int = 0;
-        for (n = 0; n < refmv_count; ++n)
+        for (n = 0; n < 2; ++n)
           if (bmi[0].as_mv[ref].as_int != mv_list[n].as_int) {
             best_sub8x8->as_int = mv_list[n].as_int;
             break;
@@ -651,15 +643,20 @@ static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
       if (b_mode == NEARESTMV) {
         best_sub8x8->as_int = bmi[2].as_mv[ref].as_int;
       } else {
-        int_mv candidates[2 + MAX_MV_REF_CANDIDATES];
-        candidates[0] = bmi[1].as_mv[ref];
-        candidates[1] = bmi[0].as_mv[ref];
-        candidates[2] = mv_list[0];
-        candidates[3] = mv_list[1];
         best_sub8x8->as_int = 0;
-        for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n)
-          if (bmi[2].as_mv[ref].as_int != candidates[n].as_int) {
-            best_sub8x8->as_int = candidates[n].as_int;
+        if (bmi[2].as_mv[ref].as_int != bmi[1].as_mv[ref].as_int) {
+          best_sub8x8->as_int = bmi[1].as_mv[ref].as_int;
+          break;
+        }
+        if (bmi[2].as_mv[ref].as_int != bmi[0].as_mv[ref].as_int) {
+          best_sub8x8->as_int = bmi[0].as_mv[ref].as_int;
+          break;
+        }
+        dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search,
+                         mv_list, mi_row, mi_col, block);
+        for (n = 0; n < 2; ++n)
+          if (bmi[2].as_mv[ref].as_int != mv_list[n].as_int) {
+            best_sub8x8->as_int = mv_list[n].as_int;
             break;
           }
       }
@@ -715,26 +712,6 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
   } else {
     if (bsize >= BLOCK_8X8)
       mi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
-    else
-      // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV.
-      // Setting mode to NEARESTMV forces the search to stop after the nearestmv
-      // has been found. After b_modes have been read, mode will be overwritten
-      // by the last b_mode.
-      mi->mode = NEARESTMV;
-
-    if (mi->mode != ZEROMV) {
-      for (ref = 0; ref < 1 + is_compound; ++ref) {
-        int_mv tmp_mvs[MAX_MV_REF_CANDIDATES];
-        const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
-        int refmv_count;
-
-        refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search,
-                                       tmp_mvs, mi_row, mi_col, -1, 0);
-
-        dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref],
-                              refmv_count);
-      }
-    }
   }
 
   mi->interp_filter = (cm->interp_filter == SWITCHABLE)
@@ -746,6 +723,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
     const int num_4x4_h = 1 << xd->bmode_blocks_hl;
     int idx, idy;
     PREDICTION_MODE b_mode;
+    int got_mv_refs_for_new = 0;
     int_mv best_sub8x8[2];
     const uint32_t invalid_mv = 0x80008000;
     // Initialize the 2nd element as even though it won't be used meaningfully
@@ -760,6 +738,18 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
           for (ref = 0; ref < 1 + is_compound; ++ref)
             append_sub8x8_mvs_for_idx(cm, xd, mv_ref_search, b_mode, j, ref,
                                       mi_row, mi_col, &best_sub8x8[ref]);
+        } else if (b_mode == NEWMV && !got_mv_refs_for_new) {
+          for (ref = 0; ref < 1 + is_compound; ++ref) {
+            int_mv tmp_mvs[MAX_MV_REF_CANDIDATES];
+            const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
+
+            dec_find_mv_refs(cm, xd, NEWMV, frame, mv_ref_search, tmp_mvs,
+                             mi_row, mi_col, -1);
+
+            lower_mv_precision(&tmp_mvs[0].as_mv, allow_hp);
+            best_ref_mvs[ref] = tmp_mvs[0];
+            got_mv_refs_for_new = 1;
+          }
         }
 
         if (!assign_mv(cm, xd, b_mode, mi->bmi[j].as_mv, best_ref_mvs,
@@ -777,6 +767,17 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
 
     copy_mv_pair(mi->mv, mi->bmi[3].as_mv);
   } else {
+    if (mi->mode != ZEROMV) {
+      for (ref = 0; ref < 1 + is_compound; ++ref) {
+        int_mv tmp_mvs[MAX_MV_REF_CANDIDATES];
+        const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
+        int refmv_count =
+            dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, tmp_mvs,
+                             mi_row, mi_col, -1);
+        lower_mv_precision(&tmp_mvs[refmv_count - 1].as_mv, allow_hp);
+        best_ref_mvs[ref] = tmp_mvs[refmv_count - 1];
+      }
+    }
     xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs,
                                 best_ref_mvs, is_compound, allow_hp, r);
   }
diff --git a/libvpx/vp9/decoder/vp9_decoder.h b/libvpx/vp9/decoder/vp9_decoder.h
index 4a22aa6b5..b0ef83c73 100644
--- a/libvpx/vp9/decoder/vp9_decoder.h
+++ b/libvpx/vp9/decoder/vp9_decoder.h
@@ -55,6 +55,7 @@ typedef struct TileWorkerData {
   DECLARE_ALIGNED(16, MACROBLOCKD, xd);
   /* dqcoeff are shared by all the planes. So planes must be decoded serially */
   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
+  DECLARE_ALIGNED(16, uint16_t, extend_and_predict_buf[80 * 2 * 80 * 2]);
   struct vpx_internal_error_info error_info;
 } TileWorkerData;
 
diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c
index e250a5a35..c2e6b3d54 100644
--- a/libvpx/vp9/decoder/vp9_detokenize.c
+++ b/libvpx/vp9/decoder/vp9_detokenize.c
@@ -243,9 +243,9 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #else
     if (read_bool(r, 128, &value, &count, &range)) {
-      dqcoeff[scan[c]] = -v;
+      dqcoeff[scan[c]] = (tran_low_t)-v;
     } else {
-      dqcoeff[scan[c]] = v;
+      dqcoeff[scan[c]] = (tran_low_t)v;
     }
 #endif  // CONFIG_COEFFICIENT_RANGE_CHECKING
     ++c;
diff --git a/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c b/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
index 8b62b450c..d75a48179 100644
--- a/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
+++ b/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
@@ -26,6 +26,22 @@
 #include "vpx_dsp/arm/mem_neon.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 
+static INLINE void calculate_dqcoeff_and_store(const int16x8_t qcoeff,
+                                               const int16x8_t dequant,
+                                               tran_low_t *dqcoeff) {
+  const int32x4_t dqcoeff_0 =
+      vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant));
+  const int32x4_t dqcoeff_1 =
+      vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant));
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  vst1q_s32(dqcoeff, dqcoeff_0);
+  vst1q_s32(dqcoeff + 4, dqcoeff_1);
+#else
+  vst1q_s16(dqcoeff, vcombine_s16(vmovn_s32(dqcoeff_0), vmovn_s32(dqcoeff_1)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+}
+
 void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
                           int skip_block, const int16_t *round_ptr,
                           const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
@@ -55,7 +71,8 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
     const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
     const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr);
     const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+    const int16x8_t v_abs = vabsq_s16(v_coeff);
+    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
     const int32x4_t v_tmp_lo =
         vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
     const int32x4_t v_tmp_hi =
@@ -67,10 +84,9 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
     const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
     const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
     const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
-    const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+    calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr);
     v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
     store_s16q_to_tran_low(qcoeff_ptr, v_qcoeff);
-    store_s16q_to_tran_low(dqcoeff_ptr, v_dqcoeff);
     v_round = vmovq_n_s16(round_ptr[1]);
     v_quant = vmovq_n_s16(quant_ptr[1]);
     v_dequant = vmovq_n_s16(dequant_ptr[1]);
@@ -80,7 +96,8 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
     const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
     const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr + i);
     const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+    const int16x8_t v_abs = vabsq_s16(v_coeff);
+    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
     const int32x4_t v_tmp_lo =
         vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
     const int32x4_t v_tmp_hi =
@@ -92,10 +109,9 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
     const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
     const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
     const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
-    const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+    calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr + i);
     v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
     store_s16q_to_tran_low(qcoeff_ptr + i, v_qcoeff);
-    store_s16q_to_tran_low(dqcoeff_ptr + i, v_dqcoeff);
   }
 #ifdef __aarch64__
   *eob_ptr = vmaxvq_s16(v_eobmax_76543210);
@@ -146,9 +162,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
   const int16x8_t dequant_mask =
       vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));
 
-  int16x8_t qcoeff = vaddq_s16(coeff_abs, round);
+  int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);
   int32x4_t dqcoeff_0, dqcoeff_1;
-  int16x8_t dqcoeff;
   uint16x8_t eob_max;
   (void)scan;
   (void)count;
@@ -170,13 +185,17 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
   // Add 1 if negative to round towards zero because the C uses division.
   dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0));
   dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1));
-
-  dqcoeff = vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
+#if CONFIG_VP9_HIGHBITDEPTH
+  vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1));
+  vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1));
+#else
+  store_s16q_to_tran_low(dqcoeff_ptr, vcombine_s16(vshrn_n_s32(dqcoeff_0, 1),
+                                                   vshrn_n_s32(dqcoeff_1, 1)));
+#endif
 
   eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan);
 
   store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
-  store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
 
   iscan += 8;
   coeff_ptr += 8;
@@ -200,9 +219,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
       const int16x8_t dequant_mask =
           vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));
 
-      int16x8_t qcoeff = vaddq_s16(coeff_abs, round);
+      int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);
       int32x4_t dqcoeff_0, dqcoeff_1;
-      int16x8_t dqcoeff;
 
       qcoeff = vqdmulhq_s16(qcoeff, quant);
       qcoeff = veorq_s16(qcoeff, coeff_sign);
@@ -215,14 +233,19 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
       dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0));
       dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1));
 
-      dqcoeff =
-          vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
+#if CONFIG_VP9_HIGHBITDEPTH
+      vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1));
+      vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1));
+#else
+      store_s16q_to_tran_low(
+          dqcoeff_ptr,
+          vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1)));
+#endif
 
       eob_max =
           vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan));
 
       store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
-      store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
 
       iscan += 8;
       coeff_ptr += 8;
diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
index adb12c10c..858a41654 100644
--- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -187,7 +187,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, MODE_INFO *const mi,
 
   // If this block is labeled for refresh, check if we should reset the
   // segment_id.
-  if (cyclic_refresh_segment_id_boosted(mi->segment_id)) {
+  if (cpi->sf.use_nonrd_pick_mode &&
+      cyclic_refresh_segment_id_boosted(mi->segment_id)) {
     mi->segment_id = refresh_this_block;
     // Reset segment_id if it will be skipped.
     if (skip) mi->segment_id = CR_SEGMENT_ID_BASE;
diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c
index d47b411fa..9eddf545e 100644
--- a/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -1214,8 +1214,8 @@ static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize,
 
   if (is_key_frame) return;
 
-  // For speed >= 8, avoid the chroma check if y_sad is above threshold.
-  if (cpi->oxcf.speed >= 8) {
+  // For speed > 8, avoid the chroma check if y_sad is above threshold.
+  if (cpi->oxcf.speed > 8) {
     if (y_sad > cpi->vbp_thresholds[1] &&
         (!cpi->noise_estimate.enabled ||
          vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium))
@@ -4248,13 +4248,21 @@ static int rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
   if (cpi->sf.prune_ref_frame_for_rect_partitions) {
     uint8_t used_frames;
     used_frames = ref_frames_used[0] | ref_frames_used[1];
-    if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
+    if (used_frames) {
+      pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames & 0xff;
+    }
     used_frames = ref_frames_used[2] | ref_frames_used[3];
-    if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
+    if (used_frames) {
+      pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames & 0xff;
+    }
     used_frames = ref_frames_used[0] | ref_frames_used[2];
-    if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
+    if (used_frames) {
+      pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames & 0xff;
+    }
     used_frames = ref_frames_used[1] | ref_frames_used[3];
-    if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
+    if (used_frames) {
+      pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames & 0xff;
+    }
   }
 
   {
diff --git a/libvpx/vp9/encoder/vp9_encoder.c b/libvpx/vp9/encoder/vp9_encoder.c
index 7f82a470b..b1a81c04a 100644
--- a/libvpx/vp9/encoder/vp9_encoder.c
+++ b/libvpx/vp9/encoder/vp9_encoder.c
@@ -80,6 +80,7 @@
 #include "vp9/encoder/vp9_speed_features.h"
 #include "vp9/encoder/vp9_svc_layercontext.h"
 #include "vp9/encoder/vp9_temporal_filter.h"
+#include "vp9/vp9_cx_iface.h"
 
 #define AM_SEGMENT_ID_INACTIVE 7
 #define AM_SEGMENT_ID_ACTIVE 0
@@ -459,8 +460,8 @@ static int compute_context_model_diff(const VP9_COMMON *const cm) {
 #endif  // !CONFIG_REALTIME_ONLY
 
 // Test for whether to calculate metrics for the frame.
-static int is_psnr_calc_enabled(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
+static int is_psnr_calc_enabled(const VP9_COMP *cpi) {
+  const VP9_COMMON *const cm = &cpi->common;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 
   return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame;
@@ -822,8 +823,28 @@ static void setup_frame(VP9_COMP *cpi) {
   // layer ARF case as well.
   if (cpi->multi_layer_arf && !cpi->use_svc) {
     GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-    cm->frame_context_idx = clamp(gf_group->layer_depth[gf_group->index] - 1, 0,
-                                  FRAME_CONTEXTS - 1);
+    const int gf_group_index = gf_group->index;
+    const int boost_frame =
+        !cpi->rc.is_src_frame_alt_ref &&
+        (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
+
+    // frame_context_idx           Frame Type
+    //        0              Intra only frame, base layer ARF
+    //        1              ARFs with layer depth = 2,3
+    //        2              ARFs with layer depth > 3
+    //        3              Non-boosted frames
+    if (frame_is_intra_only(cm)) {
+      cm->frame_context_idx = 0;
+    } else if (boost_frame) {
+      if (gf_group->rf_level[gf_group_index] == GF_ARF_STD)
+        cm->frame_context_idx = 0;
+      else if (gf_group->layer_depth[gf_group_index] <= 3)
+        cm->frame_context_idx = 1;
+      else
+        cm->frame_context_idx = 2;
+    } else {
+      cm->frame_context_idx = 3;
+    }
   }
 
   if (cm->frame_type == KEY_FRAME) {
@@ -1436,7 +1457,6 @@ static void init_level_constraint(LevelConstraint *lc) {
   lc->level_index = -1;
   lc->max_cpb_size = INT_MAX;
   lc->max_frame_size = INT_MAX;
-  lc->rc_config_updated = 0;
   lc->fail_flag = 0;
 }
 
@@ -1448,7 +1468,7 @@ static void set_level_constraint(LevelConstraint *ls, int8_t level_index) {
   }
 }
 
-static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
+static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
   VP9_COMMON *const cm = &cpi->common;
 
   cpi->oxcf = *oxcf;
@@ -1513,13 +1533,15 @@ static void set_rc_buffer_sizes(RATE_CONTROL *rc,
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
+// TODO(angiebird): make sdx8f available for highbitdepth if needed
 #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
   cpi->fn_ptr[BT].sdf = SDF;                             \
   cpi->fn_ptr[BT].sdaf = SDAF;                           \
   cpi->fn_ptr[BT].vf = VF;                               \
   cpi->fn_ptr[BT].svf = SVF;                             \
   cpi->fn_ptr[BT].svaf = SVAF;                           \
-  cpi->fn_ptr[BT].sdx4df = SDX4DF;
+  cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
+  cpi->fn_ptr[BT].sdx8f = NULL;
 
 #define MAKE_BFP_SAD_WRAPPER(fnname)                                           \
   static unsigned int fnname##_bits8(const uint8_t *src_ptr,                   \
@@ -2137,7 +2159,112 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
   } while (++i <= MV_MAX);
 }
 
-VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
+static void init_ref_frame_bufs(VP9_COMMON *cm) {
+  int i;
+  BufferPool *const pool = cm->buffer_pool;
+  cm->new_fb_idx = INVALID_IDX;
+  for (i = 0; i < REF_FRAMES; ++i) {
+    cm->ref_frame_map[i] = INVALID_IDX;
+  }
+  for (i = 0; i < FRAME_BUFFERS; ++i) {
+    pool->frame_bufs[i].ref_count = 0;
+  }
+}
+
+static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth,
+                                 int subsampling_x, int subsampling_y) {
+  VP9_COMMON *const cm = &cpi->common;
+#if !CONFIG_VP9_HIGHBITDEPTH
+  (void)use_highbitdepth;
+  assert(use_highbitdepth == 0);
+#endif
+
+  if (!cpi->initial_width ||
+#if CONFIG_VP9_HIGHBITDEPTH
+      cm->use_highbitdepth != use_highbitdepth ||
+#endif
+      cm->subsampling_x != subsampling_x ||
+      cm->subsampling_y != subsampling_y) {
+    cm->subsampling_x = subsampling_x;
+    cm->subsampling_y = subsampling_y;
+#if CONFIG_VP9_HIGHBITDEPTH
+    cm->use_highbitdepth = use_highbitdepth;
+#endif
+    alloc_util_frame_buffers(cpi);
+    cpi->initial_width = cm->width;
+    cpi->initial_height = cm->height;
+    cpi->initial_mbs = cm->MBs;
+  }
+}
+
+// TODO(angiebird): Check whether we can move this function to vpx_image.c
+static INLINE void vpx_img_chroma_subsampling(vpx_img_fmt_t fmt,
+                                              unsigned int *subsampling_x,
+                                              unsigned int *subsampling_y) {
+  switch (fmt) {
+    case VPX_IMG_FMT_I420:
+    case VPX_IMG_FMT_YV12:
+    case VPX_IMG_FMT_I422:
+    case VPX_IMG_FMT_I42016:
+    case VPX_IMG_FMT_I42216: *subsampling_x = 1; break;
+    default: *subsampling_x = 0; break;
+  }
+
+  switch (fmt) {
+    case VPX_IMG_FMT_I420:
+    case VPX_IMG_FMT_I440:
+    case VPX_IMG_FMT_YV12:
+    case VPX_IMG_FMT_I42016:
+    case VPX_IMG_FMT_I44016: *subsampling_y = 1; break;
+    default: *subsampling_y = 0; break;
+  }
+}
+
+// TODO(angiebird): Check whether we can move this function to vpx_image.c
+static INLINE int vpx_img_use_highbitdepth(vpx_img_fmt_t fmt) {
+  return fmt & VPX_IMG_FMT_HIGHBITDEPTH;
+}
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+static void setup_denoiser_buffer(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  if (cpi->oxcf.noise_sensitivity > 0 &&
+      !cpi->denoiser.frame_buffer_initialized) {
+    if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
+                           cpi->oxcf.noise_sensitivity, cm->width, cm->height,
+                           cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                           cm->use_highbitdepth,
+#endif
+                           VP9_ENC_BORDER_IN_PIXELS))
+      vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                         "Failed to allocate denoiser");
+  }
+}
+#endif
+
+void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt) {
+  const VP9EncoderConfig *oxcf = &cpi->oxcf;
+  unsigned int subsampling_x, subsampling_y;
+  const int use_highbitdepth = vpx_img_use_highbitdepth(img_fmt);
+  vpx_img_chroma_subsampling(img_fmt, &subsampling_x, &subsampling_y);
+
+  update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
+#if CONFIG_VP9_TEMPORAL_DENOISING
+  setup_denoiser_buffer(cpi);
+#endif
+
+  assert(cpi->lookahead == NULL);
+  cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, subsampling_x,
+                                      subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                                      use_highbitdepth,
+#endif
+                                      oxcf->lag_in_frames);
+  alloc_raw_frame_buffers(cpi);
+}
+
+VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
                                 BufferPool *const pool) {
   unsigned int i;
   VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
@@ -2170,10 +2297,13 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
   cpi->resize_buffer_underflow = 0;
   cpi->use_skin_detection = 0;
   cpi->common.buffer_pool = pool;
+  init_ref_frame_bufs(cm);
 
   cpi->force_update_segmentation = 0;
 
   init_config(cpi, oxcf);
+  cpi->frame_info = vp9_get_frame_info(oxcf);
+
   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
 
   cm->current_video_frame = 0;
@@ -2341,6 +2471,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
         const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
         const int packets_in_layer = (int)last_packet_for_layer->count + 1;
         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
+          int num_frames;
           LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
 
           vpx_free(lc->rc_twopass_stats_in.buf);
@@ -2352,6 +2483,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
           lc->twopass.stats_in = lc->twopass.stats_in_start;
           lc->twopass.stats_in_end =
               lc->twopass.stats_in_start + packets_in_layer - 1;
+          // Note the last packet is cumulative first pass stats.
+          // So the number of frames is packet number minus one
+          num_frames = packets_in_layer - 1;
+          fps_init_first_pass_info(&lc->twopass.first_pass_info,
+                                   lc->rc_twopass_stats_in.buf, num_frames);
           stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
         }
       }
@@ -2367,6 +2503,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
 
       vp9_init_second_pass_spatial_svc(cpi);
     } else {
+      int num_frames;
 #if CONFIG_FP_MB_STATS
       if (cpi->use_fp_mb_stats) {
         const size_t psz = cpi->common.MBs * sizeof(uint8_t);
@@ -2383,6 +2520,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
       cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
       cpi->twopass.stats_in = cpi->twopass.stats_in_start;
       cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
+      // Note the last packet is cumulative first pass stats.
+      // So the number of frames is packet number minus one
+      num_frames = packets - 1;
+      fps_init_first_pass_info(&cpi->twopass.first_pass_info,
+                               oxcf->two_pass_stats_in.buf, num_frames);
 
       vp9_init_second_pass(cpi);
     }
@@ -2409,7 +2551,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
 
   cpi->kmeans_data_arr_alloc = 0;
 #if CONFIG_NON_GREEDY_MV
-  cpi->feature_score_loc_alloc = 0;
   cpi->tpl_ready = 0;
 #endif  // CONFIG_NON_GREEDY_MV
   for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
@@ -2418,62 +2559,67 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
   CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
   cpi->source_var_thresh = 0;
   cpi->frames_till_next_var_check = 0;
+#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \
+  cpi->fn_ptr[BT].sdf = SDF;                             \
+  cpi->fn_ptr[BT].sdaf = SDAF;                           \
+  cpi->fn_ptr[BT].vf = VF;                               \
+  cpi->fn_ptr[BT].svf = SVF;                             \
+  cpi->fn_ptr[BT].svaf = SVAF;                           \
+  cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
+  cpi->fn_ptr[BT].sdx8f = SDX8F;
 
-#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
-  cpi->fn_ptr[BT].sdf = SDF;                      \
-  cpi->fn_ptr[BT].sdaf = SDAF;                    \
-  cpi->fn_ptr[BT].vf = VF;                        \
-  cpi->fn_ptr[BT].svf = SVF;                      \
-  cpi->fn_ptr[BT].svaf = SVAF;                    \
-  cpi->fn_ptr[BT].sdx4df = SDX4DF;
-
+  // TODO(angiebird): make sdx8f available for every block size
   BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
       vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
-      vpx_sad32x16x4d)
+      vpx_sad32x16x4d, NULL)
 
   BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
       vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
-      vpx_sad16x32x4d)
+      vpx_sad16x32x4d, NULL)
 
   BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
       vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
-      vpx_sad64x32x4d)
+      vpx_sad64x32x4d, NULL)
 
   BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
       vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
-      vpx_sad32x64x4d)
+      vpx_sad32x64x4d, NULL)
 
   BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
       vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
-      vpx_sad32x32x4d)
+      vpx_sad32x32x4d, vpx_sad32x32x8)
 
   BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
       vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
-      vpx_sad64x64x4d)
+      vpx_sad64x64x4d, NULL)
 
   BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
       vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
-      vpx_sad16x16x4d)
+      vpx_sad16x16x4d, vpx_sad16x16x8)
 
   BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
       vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
-      vpx_sad16x8x4d)
+      vpx_sad16x8x4d, vpx_sad16x8x8)
 
   BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
       vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
-      vpx_sad8x16x4d)
+      vpx_sad8x16x4d, vpx_sad8x16x8)
 
   BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
-      vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d)
+      vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
+      vpx_sad8x8x8)
 
   BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
-      vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d)
+      vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
+      NULL)
 
   BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
-      vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d)
+      vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
+      NULL)
 
   BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
-      vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d)
+      vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
+      vpx_sad4x4x8)
 
 #if CONFIG_VP9_HIGHBITDEPTH
   highbd_set_var_fns(cpi);
@@ -2501,6 +2647,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
 
   cm->error.setjmp = 0;
 
+#if CONFIG_RATE_CTRL
+  encode_command_init(&cpi->encode_command);
+#endif
+
   return cpi;
 }
 
@@ -2511,9 +2661,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
   snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
 #endif  // CONFIG_INTERNAL_STATS
 
+static void free_tpl_buffer(VP9_COMP *cpi);
+
 void vp9_remove_compressor(VP9_COMP *cpi) {
   VP9_COMMON *cm;
-  unsigned int i, frame;
+  unsigned int i;
   int t;
 
   if (!cpi) return;
@@ -2586,9 +2738,16 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
           SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
         }
 
-        fprintf(f, "%s\t    Time\tRcErr\tAbsErr\n", headings);
-        fprintf(f, "%s\t%8.0f\t%7.2f\t%7.2f\n", results, total_encode_time,
-                rate_err, fabs(rate_err));
+        SNPRINT(headings, "\t    Time\tRcErr\tAbsErr");
+        SNPRINT2(results, "\t%8.0f", total_encode_time);
+        SNPRINT2(results, "\t%7.2f", rate_err);
+        SNPRINT2(results, "\t%7.2f", fabs(rate_err));
+
+        fprintf(f, "%s\tAPsnr611\n", headings);
+        fprintf(
+            f, "%s\t%7.3f\n", results,
+            (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) /
+                (cpi->count * 8));
       }
 
       fclose(f);
@@ -2618,27 +2777,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
     vpx_free(cpi->kmeans_data_arr);
   }
 
-#if CONFIG_NON_GREEDY_MV
-  vpx_free(cpi->feature_score_loc_arr);
-  vpx_free(cpi->feature_score_loc_sort);
-  vpx_free(cpi->feature_score_loc_heap);
-  vpx_free(cpi->select_mv_arr);
-#endif
-  for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
-#if CONFIG_NON_GREEDY_MV
-    int rf_idx;
-    for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
-      int sqr_bsize;
-      for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
-        vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]);
-      }
-      vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
-      vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
-    }
-#endif
-    vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
-    cpi->tpl_stats[frame].is_valid = 0;
-  }
+  free_tpl_buffer(cpi);
 
   for (t = 0; t < cpi->num_workers; ++t) {
     VPxWorker *const worker = &cpi->workers[t];
@@ -2719,30 +2858,19 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
 #endif
 }
 
-static void generate_psnr_packet(VP9_COMP *cpi) {
-  struct vpx_codec_cx_pkt pkt;
-  int i;
-  PSNR_STATS psnr;
+int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) {
+  if (is_psnr_calc_enabled(cpi)) {
 #if CONFIG_VP9_HIGHBITDEPTH
-  vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr,
-                       cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
+    vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr,
+                         cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
 #else
-  vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr);
+    vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr);
 #endif
-
-  for (i = 0; i < 4; ++i) {
-    pkt.data.psnr.samples[i] = psnr.samples[i];
-    pkt.data.psnr.sse[i] = psnr.sse[i];
-    pkt.data.psnr.psnr[i] = psnr.psnr[i];
+    return 1;
+  } else {
+    vp9_zero(*psnr);
+    return 0;
   }
-  pkt.kind = VPX_CODEC_PSNR_PKT;
-  if (cpi->use_svc)
-    cpi->svc
-        .layer_context[cpi->svc.spatial_layer_id *
-                       cpi->svc.number_temporal_layers]
-        .psnr_pkt = pkt.data.psnr;
-  else
-    vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
 }
 
 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
@@ -3572,29 +3700,12 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
           vpx_calloc(cpi->un_scaled_source->y_width,
                      sizeof(*cpi->common.postproc_state.limits));
     }
-    vp9_denoise(cpi->Source, cpi->Source, l, cpi->common.postproc_state.limits);
+    vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l,
+                cpi->common.postproc_state.limits);
   }
 #endif  // CONFIG_VP9_POSTPROC
 }
 
-#if CONFIG_VP9_TEMPORAL_DENOISING
-static void setup_denoiser_buffer(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-  if (cpi->oxcf.noise_sensitivity > 0 &&
-      !cpi->denoiser.frame_buffer_initialized) {
-    if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
-                           cpi->oxcf.noise_sensitivity, cm->width, cm->height,
-                           cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
-                           cm->use_highbitdepth,
-#endif
-                           VP9_ENC_BORDER_IN_PIXELS))
-      vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
-                         "Failed to allocate denoiser");
-  }
-}
-#endif
-
 static void init_motion_estimation(VP9_COMP *cpi) {
   int y_stride = cpi->scaled_source.y_stride;
 
@@ -4175,6 +4286,14 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
       vp9_scale_references(cpi);
     }
 
+#if CONFIG_RATE_CTRL
+    // TODO(angiebird): This is a hack for making sure the encoder use the
+    // external_quantize_index exactly. Avoid this kind of hack later.
+    if (cpi->encode_command.use_external_quantize_index) {
+      q = cpi->encode_command.external_quantize_index;
+    }
+#endif
+
     vp9_set_quantizer(cm, q);
 
     if (loop_count == 0) setup_frame(cpi);
@@ -4213,6 +4332,16 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
       if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
     }
 
+#if CONFIG_RATE_CTRL
+    // This part needs to be after save_coding_context() because
+    // restore_coding_context will be called in the end of this function.
+    // TODO(angiebird): This is a hack for making sure the encoder use the
+    // external_quantize_index exactly. Avoid this kind of hack later.
+    if (cpi->encode_command.use_external_quantize_index) {
+      break;
+    }
+#endif
+
     if (oxcf->rc_mode == VPX_Q) {
       loop = 0;
     } else {
@@ -4389,7 +4518,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
     }
 
     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF)
-      if (loop || !enable_acl) restore_coding_context(cpi);
+      if (loop) restore_coding_context(cpi);
   } while (loop);
 
 #ifdef AGGRESSIVE_VBR
@@ -4415,13 +4544,11 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
     // Skip recoding, if model diff is below threshold
     const int thresh = compute_context_model_thresh(cpi);
     const int diff = compute_context_model_diff(cm);
-    if (diff < thresh) {
-      vpx_clear_system_state();
-      restore_coding_context(cpi);
-      return;
+    if (diff >= thresh) {
+      vp9_encode_frame(cpi);
     }
-
-    vp9_encode_frame(cpi);
+  }
+  if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
     vpx_clear_system_state();
     restore_coding_context(cpi);
   }
@@ -4756,17 +4883,6 @@ static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
   }
 }
 
-// Implementation and modifications of C. Yeo, H. L. Tan, and Y. H. Tan, "On
-// rate distortion optimization using SSIM," Circuits and Systems for Video
-// Technology, IEEE Transactions on, vol. 23, no. 7, pp. 1170-1181, 2013.
-// SSIM_VAR_SCALE defines the strength of the bias towards SSIM in RDO.
-// Some sample values are:
-// (for midres test set)
-// SSIM_VAR_SCALE  avg_psnr   ssim   ms_ssim
-//      8.0          9.421   -5.537  -6.898
-//     16.0          4.703   -5.378  -6.238
-//     32.0          1.929   -4.308  -4.807
-#define SSIM_VAR_SCALE 16.0
 static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
   VP9_COMMON *cm = &cpi->common;
   ThreadData *td = &cpi->td;
@@ -4783,19 +4899,6 @@ static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
   double log_sum = 0.0;
   int row, col;
 
-#if CONFIG_VP9_HIGHBITDEPTH
-  double c2;
-  if (xd->bd == 10) {
-    c2 = 941.8761;  // (.03*1023)^2
-  } else if (xd->bd == 12) {
-    c2 = 15092.1225;  // (.03*4095)^2
-  } else {
-    c2 = 58.5225;  // (.03*255)^2
-  }
-#else
-  const double c2 = 58.5225;  // (.03*255)^2
-#endif
-
   // Loop through each 64x64 block.
   for (row = 0; row < num_rows; ++row) {
     for (col = 0; col < num_cols; ++col) {
@@ -4817,19 +4920,22 @@ static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
           // In order to make SSIM_VAR_SCALE in a same scale for both 8 bit
           // and high bit videos, the variance needs to be divided by 2.0 or
           // 64.0 separately.
+          // TODO(sdeng): need to tune for 12bit videos.
 #if CONFIG_VP9_HIGHBITDEPTH
           if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
-            var +=
-                vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd) / 2.0;
+            var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd);
           else
 #endif
-            var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8) / 64.0;
+            var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8);
 
           num_of_var += 1.0;
         }
       }
-      var = var / num_of_var / SSIM_VAR_SCALE;
-      var = 2.0 * var + c2;
+      var = var / num_of_var / 64.0;
+
+      // Curve fitting with an exponential model on all 16x16 blocks from the
+      // Midres dataset.
+      var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222;
       cpi->mi_ssim_rdmult_scaling_factors[index] = var;
       log_sum += log(var);
     }
@@ -4976,12 +5082,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
   TX_SIZE t;
 
   // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
-  // If in constrained layer drop mode (svc.framedrop_mode != LAYER_DROP) and
-  // base spatial layer was dropped, no need to set svc.skip_enhancement_layer,
-  // as whole superframe will be dropped.
+  // No need to set svc.skip_enhancement_layer if whole superframe will be
+  // dropped.
   if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
       cpi->oxcf.target_bandwidth == 0 &&
       !(cpi->svc.framedrop_mode != LAYER_DROP &&
+        (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP ||
+         cpi->svc
+             .force_drop_constrained_from_above[cpi->svc.number_spatial_layers -
+                                                1]) &&
         cpi->svc.drop_spatial_layer[0])) {
     cpi->svc.skip_enhancement_layer = 1;
     vp9_rc_postencode_update_drop_frame(cpi);
@@ -4989,17 +5098,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
     cpi->last_frame_dropped = 1;
     cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
     cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
-    if (cpi->svc.framedrop_mode == LAYER_DROP ||
-        cpi->svc.drop_spatial_layer[0] == 0) {
-      // For the case of constrained drop mode where the base is dropped
-      // (drop_spatial_layer[0] == 1), which means full superframe dropped,
-      // we don't increment the svc frame counters. In particular temporal
-      // layer counter (which is incremented in vp9_inc_frame_in_layer())
-      // won't be incremented, so on a dropped frame we try the same
-      // temporal_layer_id on next incoming frame. This is to avoid an
-      // issue with temporal alignement with full superframe dropping.
-      vp9_inc_frame_in_layer(cpi);
-    }
+    vp9_inc_frame_in_layer(cpi);
     return;
   }
 
@@ -5285,54 +5384,9 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
   mismatch_move_frame_idx_w();
 #endif
   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
-
-  vp9_twopass_postencode_update(cpi);
 }
 #endif  // !CONFIG_REALTIME_ONLY
 
-static void init_ref_frame_bufs(VP9_COMMON *cm) {
-  int i;
-  BufferPool *const pool = cm->buffer_pool;
-  cm->new_fb_idx = INVALID_IDX;
-  for (i = 0; i < REF_FRAMES; ++i) {
-    cm->ref_frame_map[i] = INVALID_IDX;
-  }
-  for (i = 0; i < FRAME_BUFFERS; ++i) {
-    pool->frame_bufs[i].ref_count = 0;
-  }
-}
-
-static void check_initial_width(VP9_COMP *cpi,
-#if CONFIG_VP9_HIGHBITDEPTH
-                                int use_highbitdepth,
-#endif
-                                int subsampling_x, int subsampling_y) {
-  VP9_COMMON *const cm = &cpi->common;
-
-  if (!cpi->initial_width ||
-#if CONFIG_VP9_HIGHBITDEPTH
-      cm->use_highbitdepth != use_highbitdepth ||
-#endif
-      cm->subsampling_x != subsampling_x ||
-      cm->subsampling_y != subsampling_y) {
-    cm->subsampling_x = subsampling_x;
-    cm->subsampling_y = subsampling_y;
-#if CONFIG_VP9_HIGHBITDEPTH
-    cm->use_highbitdepth = use_highbitdepth;
-#endif
-
-    alloc_raw_frame_buffers(cpi);
-    init_ref_frame_bufs(cm);
-    alloc_util_frame_buffers(cpi);
-
-    init_motion_estimation(cpi);  // TODO(agrange) This can be removed.
-
-    cpi->initial_width = cm->width;
-    cpi->initial_height = cm->height;
-    cpi->initial_mbs = cm->MBs;
-  }
-}
-
 int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
                           int64_t end_time) {
@@ -5343,30 +5397,21 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
   const int subsampling_y = sd->subsampling_y;
 #if CONFIG_VP9_HIGHBITDEPTH
   const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
-#endif
-
-#if CONFIG_VP9_HIGHBITDEPTH
-  check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
 #else
-  check_initial_width(cpi, subsampling_x, subsampling_y);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-#if CONFIG_VP9_HIGHBITDEPTH
-  // Disable denoiser for high bitdepth since vp9_denoiser_filter only works for
-  // 8 bits.
-  if (cm->bit_depth > 8) cpi->oxcf.noise_sensitivity = 0;
+  const int use_highbitdepth = 0;
 #endif
 
+  update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
 #if CONFIG_VP9_TEMPORAL_DENOISING
   setup_denoiser_buffer(cpi);
 #endif
+
+  alloc_raw_frame_buffers(cpi);
+
   vpx_usec_timer_start(&timer);
 
   if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
-#if CONFIG_VP9_HIGHBITDEPTH
-                         use_highbitdepth,
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-                         frame_flags))
+                         use_highbitdepth, frame_flags))
     res = -1;
   vpx_usec_timer_mark(&timer);
   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
@@ -5867,18 +5912,89 @@ static void init_tpl_stats(VP9_COMP *cpi) {
 }
 
 #if CONFIG_NON_GREEDY_MV
-static uint32_t motion_compensated_prediction(
-    VP9_COMP *cpi, ThreadData *td, int frame_idx, uint8_t *cur_frame_buf,
-    uint8_t *ref_frame_buf, int stride, BLOCK_SIZE bsize, int mi_row,
-    int mi_col, MV *mv, int rf_idx) {
-#else   // CONFIG_NON_GREEDY_MV
+static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
+                                         MotionField *motion_field,
+                                         int frame_idx, uint8_t *cur_frame_buf,
+                                         uint8_t *ref_frame_buf, int stride,
+                                         BLOCK_SIZE bsize, int mi_row,
+                                         int mi_col, MV *mv) {
+  MACROBLOCK *const x = &td->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+  int step_param;
+  uint32_t bestsme = UINT_MAX;
+  const MvLimits tmp_mv_limits = x->mv_limits;
+  // lambda is used to adjust the importance of motion vector consitency.
+  // TODO(angiebird): Figure out lambda's proper value.
+  const int lambda = cpi->tpl_stats[frame_idx].lambda;
+  int_mv nb_full_mvs[NB_MVS_NUM];
+  int nb_full_mv_num;
+
+  MV best_ref_mv1 = { 0, 0 };
+  MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
+
+  best_ref_mv1_full.col = best_ref_mv1.col >> 3;
+  best_ref_mv1_full.row = best_ref_mv1.row >> 3;
+
+  // Setup frame pointers
+  x->plane[0].src.buf = cur_frame_buf;
+  x->plane[0].src.stride = stride;
+  xd->plane[0].pre[0].buf = ref_frame_buf;
+  xd->plane[0].pre[0].stride = stride;
+
+  step_param = mv_sf->reduce_first_step_size;
+  step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
+
+  vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
+
+  nb_full_mv_num =
+      vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
+  vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
+                             lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
+
+  /* restore UMV window */
+  x->mv_limits = tmp_mv_limits;
+
+  return bestsme;
+}
+
+static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
+                                        uint8_t *cur_frame_buf,
+                                        uint8_t *ref_frame_buf, int stride,
+                                        BLOCK_SIZE bsize, MV *mv) {
+  MACROBLOCK *const x = &td->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+  uint32_t bestsme = UINT_MAX;
+  uint32_t distortion;
+  uint32_t sse;
+  int cost_list[5];
+
+  MV best_ref_mv1 = { 0, 0 };
+
+  // Setup frame pointers
+  x->plane[0].src.buf = cur_frame_buf;
+  x->plane[0].src.stride = stride;
+  xd->plane[0].pre[0].buf = ref_frame_buf;
+  xd->plane[0].pre[0].stride = stride;
+
+  // TODO(yunqing): may use higher tap interp filter than 2 taps.
+  // Ignore mv costing by sending NULL pointer instead of cost array
+  bestsme = cpi->find_fractional_mv_step(
+      x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
+      &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
+      cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
+      USE_2_TAPS);
+
+  return bestsme;
+}
+
+#else  // CONFIG_NON_GREEDY_MV
 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
-                                              int frame_idx,
                                               uint8_t *cur_frame_buf,
                                               uint8_t *ref_frame_buf,
                                               int stride, BLOCK_SIZE bsize,
-                                              int mi_row, int mi_col, MV *mv) {
-#endif  // CONFIG_NON_GREEDY_MV
+                                              MV *mv) {
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
@@ -5890,12 +6006,6 @@ static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
   uint32_t sse;
   int cost_list[5];
   const MvLimits tmp_mv_limits = x->mv_limits;
-#if CONFIG_NON_GREEDY_MV
-  // lambda is used to adjust the importance of motion vector consitency.
-  // TODO(angiebird): Figure out lambda's proper value.
-  const int lambda = cpi->tpl_stats[frame_idx].lambda;
-  int_mv nb_full_mvs[NB_MVS_NUM];
-#endif
 
   MV best_ref_mv1 = { 0, 0 };
   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
@@ -5914,21 +6024,9 @@ static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
 
   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
 
-#if CONFIG_NON_GREEDY_MV
-  (void)search_method;
-  (void)sadpb;
-  vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx,
-                          bsize, nb_full_mvs);
-  vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1,
-                             &cpi->fn_ptr[bsize], nb_full_mvs, NB_MVS_NUM, mv);
-#else
-  (void)frame_idx;
-  (void)mi_row;
-  (void)mi_col;
   vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
                         search_method, sadpb, cond_cost_list(cpi, cost_list),
                         &best_ref_mv1, mv, 0, 0);
-#endif
 
   /* restore UMV window */
   x->mv_limits = tmp_mv_limits;
@@ -5943,6 +6041,7 @@ static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
 
   return bestsme;
 }
+#endif
 
 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
                             int ref_pos_col, int block, BLOCK_SIZE bsize) {
@@ -6224,19 +6323,22 @@ static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
 
   set_mv_limits(cm, x, mi_row, mi_col);
 
-  for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+  for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
     int_mv mv;
+#if CONFIG_NON_GREEDY_MV
+    MotionField *motion_field;
+#endif
     if (ref_frame[rf_idx] == NULL) continue;
 
 #if CONFIG_NON_GREEDY_MV
     (void)td;
-    mv.as_int =
-        get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col)->as_int;
+    motion_field = vp9_motion_field_info_get_motion_field(
+        &cpi->motion_field_info, frame_idx, rf_idx, bsize);
+    mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
 #else
-    motion_compensated_prediction(
-        cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
-        ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
-        mi_row, mi_col, &mv.as_mv);
+    motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
+                                  ref_frame[rf_idx]->y_buffer + mb_y_offset,
+                                  xd->cur_buf->y_stride, bsize, &mv.as_mv);
 #endif
 
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -6378,8 +6480,9 @@ static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
 }
 
 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
-                                  TplDepFrame *tpl_frame, int rf_idx,
-                                  BLOCK_SIZE bsize, int mi_row, int mi_col) {
+                                  MotionField *motion_field,
+                                  TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
+                                  int mi_row, int mi_col) {
   int_mv mv;
   switch (mv_mode) {
     case ZERO_MV_MODE:
@@ -6387,7 +6490,7 @@ static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
       mv.as_mv.col = 0;
       break;
     case NEW_MV_MODE:
-      mv = *get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col);
+      mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
       break;
     case NEAREST_MV_MODE:
       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
@@ -6404,15 +6507,16 @@ static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
 }
 
 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
-                          GF_PICTURE *gf_picture, int frame_idx,
-                          TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize,
-                          int mi_row, int mi_col, int_mv *mv) {
+                          GF_PICTURE *gf_picture, MotionField *motion_field,
+                          int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
+                          BLOCK_SIZE bsize, int mi_row, int mi_col,
+                          int_mv *mv) {
   uint32_t sse;
   struct buf_2d src;
   struct buf_2d pre;
   MV full_mv;
-  *mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row,
-                            mi_col);
+  *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
+                            mi_row, mi_col);
   full_mv = get_full_mv(&mv->as_mv);
   if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
                              &src, &pre)) {
@@ -6449,18 +6553,18 @@ static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
   mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
   return mv_diff_cost;
 }
-static double get_mv_cost(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
-                          int rf_idx, BLOCK_SIZE bsize, int mi_row,
+static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
+                          TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
                           int mi_col) {
   double mv_cost = get_mv_mode_cost(mv_mode);
   if (mv_mode == NEW_MV_MODE) {
-    MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize,
-                                    mi_row, mi_col)
+    MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
+                                    bsize, mi_row, mi_col)
                     .as_mv;
-    MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, tpl_frame, rf_idx,
-                                        bsize, mi_row, mi_col)
+    MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
+                                        tpl_frame, bsize, mi_row, mi_col)
                         .as_mv;
-    MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, tpl_frame, rf_idx,
+    MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
                                      bsize, mi_row, mi_col)
                      .as_mv;
     double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
@@ -6471,21 +6575,24 @@ static double get_mv_cost(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
 }
 
 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
-                           GF_PICTURE *gf_picture, int frame_idx,
-                           TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize,
-                           int mi_row, int mi_col, int_mv *mv) {
+                           GF_PICTURE *gf_picture, MotionField *motion_field,
+                           int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
+                           BLOCK_SIZE bsize, int mi_row, int mi_col,
+                           int_mv *mv) {
   MACROBLOCKD *xd = &x->e_mbd;
-  double mv_dist = get_mv_dist(mv_mode, cpi, xd, gf_picture, frame_idx,
-                               tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
+  double mv_dist =
+      get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
+                  tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
   double mv_cost =
-      get_mv_cost(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row, mi_col);
+      get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
   double mult = 180;
 
   return mv_cost + mult * log2f(1 + mv_dist);
 }
 
 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
-                                 GF_PICTURE *gf_picture, int frame_idx,
+                                 GF_PICTURE *gf_picture,
+                                 MotionField *motion_field, int frame_idx,
                                  TplDepFrame *tpl_frame, int rf_idx,
                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
                                  double *rd, int_mv *mv) {
@@ -6499,8 +6606,8 @@ static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
     if (mv_mode == NEW_MV_MODE) {
       continue;
     }
-    this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, frame_idx, tpl_frame,
-                           rf_idx, bsize, mi_row, mi_col, &this_mv);
+    this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
+                           tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
     if (update == 0) {
       *rd = this_rd;
       *mv = this_mv;
@@ -6518,8 +6625,8 @@ static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
 }
 
 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
-                            GF_PICTURE *gf_picture, int frame_idx,
-                            TplDepFrame *tpl_frame, int rf_idx,
+                            GF_PICTURE *gf_picture, MotionField *motion_field,
+                            int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
                             BLOCK_SIZE bsize, int mi_row, int mi_col) {
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
@@ -6549,9 +6656,9 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
         double this_rd;
         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
-        mv_mode_arr[nb_row * stride + nb_col] =
-            find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame,
-                                  rf_idx, bsize, nb_row, nb_col, &this_rd, mv);
+        mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
+            cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
+            bsize, nb_row, nb_col, &this_rd, mv);
         if (r == 0 && c == 0) {
           this_no_new_mv_rd = this_rd;
         }
@@ -6565,9 +6672,9 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
 
   // new mv
   mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
-  this_new_mv_rd = eval_mv_mode(NEW_MV_MODE, cpi, x, gf_picture, frame_idx,
-                                tpl_frame, rf_idx, bsize, mi_row, mi_col,
-                                &select_mv_arr[mi_row * stride + mi_col]);
+  this_new_mv_rd = eval_mv_mode(
+      NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
+      rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
   new_mv_rd = this_new_mv_rd;
   // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
   // beforehand.
@@ -6580,9 +6687,9 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
         double this_rd;
         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
-        mv_mode_arr[nb_row * stride + nb_col] =
-            find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame,
-                                  rf_idx, bsize, nb_row, nb_col, &this_rd, mv);
+        mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
+            cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
+            bsize, nb_row, nb_col, &this_rd, mv);
         new_mv_rd += this_rd;
       }
     }
@@ -6612,7 +6719,8 @@ static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
 }
 
 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
-                                GF_PICTURE *gf_picture, int frame_idx,
+                                GF_PICTURE *gf_picture,
+                                MotionField *motion_field, int frame_idx,
                                 TplDepFrame *tpl_frame, int rf_idx,
                                 BLOCK_SIZE bsize) {
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
@@ -6631,160 +6739,40 @@ static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
       assert(c >= 0 && c < unit_cols);
       assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
       assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
-      predict_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, rf_idx, bsize,
-                      mi_row, mi_col);
+      predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
+                      rf_idx, bsize, mi_row, mi_col);
     }
   }
 }
 
-static double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows,
-                                int cols) {
-  double IxIx = 0;
-  double IxIy = 0;
-  double IyIy = 0;
-  double score;
-  int r, c;
-  vpx_clear_system_state();
-  for (r = 0; r + 1 < rows; ++r) {
-    for (c = 0; c + 1 < cols; ++c) {
-      int diff_x = buf[r * stride + c] - buf[r * stride + c + 1];
-      int diff_y = buf[r * stride + c] - buf[(r + 1) * stride + c];
-      IxIx += diff_x * diff_x;
-      IxIy += diff_x * diff_y;
-      IyIy += diff_y * diff_y;
-    }
-  }
-  IxIx /= (rows - 1) * (cols - 1);
-  IxIy /= (rows - 1) * (cols - 1);
-  IyIy /= (rows - 1) * (cols - 1);
-  score = (IxIx * IyIy - IxIy * IxIy + 0.0001) / (IxIx + IyIy + 0.0001);
-  return score;
-}
-
-static int compare_feature_score(const void *a, const void *b) {
-  const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a;
-  const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b;
-  if (aa->feature_score < bb->feature_score) {
-    return 1;
-  } else if (aa->feature_score > bb->feature_score) {
-    return -1;
-  } else {
-    return 0;
-  }
-}
-
-static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx,
-                             YV12_BUFFER_CONFIG **ref_frame, BLOCK_SIZE bsize,
+static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
+                             MotionField *motion_field, int frame_idx,
+                             YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
                              int mi_row, int mi_col) {
   VP9_COMMON *cm = &cpi->common;
   MACROBLOCK *x = &td->mb;
   MACROBLOCKD *xd = &x->e_mbd;
-  TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
-  TplDepStats *tpl_stats =
-      &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
   const int mb_y_offset =
       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
-  int rf_idx;
-
+  assert(ref_frame != NULL);
   set_mv_limits(cm, x, mi_row, mi_col);
-
-  for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
-    int_mv *mv = get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col);
-    if (ref_frame[rf_idx] == NULL) {
-      tpl_stats->ready[rf_idx] = 0;
-      continue;
-    } else {
-      tpl_stats->ready[rf_idx] = 1;
-    }
-    motion_compensated_prediction(
-        cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
-        ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
-        mi_row, mi_col, &mv->as_mv, rf_idx);
-  }
-}
-
-#define CHANGE_MV_SEARCH_ORDER 1
-#define USE_PQSORT 1
-
-#if CHANGE_MV_SEARCH_ORDER
-#if USE_PQSORT
-static void max_heap_pop(FEATURE_SCORE_LOC **heap, int *size,
-                         FEATURE_SCORE_LOC **output) {
-  if (*size > 0) {
-    *output = heap[0];
-    --*size;
-    if (*size > 0) {
-      int p, l, r;
-      heap[0] = heap[*size];
-      p = 0;
-      l = 2 * p + 1;
-      r = 2 * p + 2;
-      while (l < *size) {
-        FEATURE_SCORE_LOC *tmp;
-        int c = l;
-        if (r < *size && heap[r]->feature_score > heap[l]->feature_score) {
-          c = r;
-        }
-        if (heap[p]->feature_score >= heap[c]->feature_score) {
-          break;
-        }
-        tmp = heap[p];
-        heap[p] = heap[c];
-        heap[c] = tmp;
-        p = c;
-        l = 2 * p + 1;
-        r = 2 * p + 2;
-      }
-    }
-  } else {
-    assert(0);
-  }
-}
-
-static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size,
-                          FEATURE_SCORE_LOC *input) {
-  int c, p;
-  FEATURE_SCORE_LOC *tmp;
-  input->visited = 1;
-  heap[*size] = input;
-  ++*size;
-  c = *size - 1;
-  p = c >> 1;
-  while (c > 0 && heap[c]->feature_score > heap[p]->feature_score) {
-    tmp = heap[p];
-    heap[p] = heap[c];
-    heap[c] = tmp;
-    c = p;
-    p >>= 1;
-  }
-}
-
-static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame,
-                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                  int *heap_size) {
-  const int mi_unit = num_8x8_blocks_wide_lookup[bsize];
-  const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
-  int i;
-  for (i = 0; i < NB_MVS_NUM; ++i) {
-    int r = dirs[i][0] * mi_unit;
-    int c = dirs[i][1] * mi_unit;
-    if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
-        mi_col + c < tpl_frame->mi_cols) {
-      FEATURE_SCORE_LOC *fs_loc =
-          &cpi->feature_score_loc_arr[(mi_row + r) * tpl_frame->stride +
-                                      (mi_col + c)];
-      if (fs_loc->visited == 0) {
-        max_heap_push(cpi->feature_score_loc_heap, heap_size, fs_loc);
-      }
-    }
-  }
-}
-#endif  // USE_PQSORT
-#endif  // CHANGE_MV_SEARCH_ORDER
-
-static void build_motion_field(VP9_COMP *cpi, MACROBLOCKD *xd, int frame_idx,
-                               YV12_BUFFER_CONFIG *ref_frame[3],
-                               BLOCK_SIZE bsize) {
+  {
+    int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
+    uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
+    uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
+    const int stride = xd->cur_buf->y_stride;
+    full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
+                             ref_frame_buf, stride, bsize, mi_row, mi_col,
+                             &mv.as_mv);
+    sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
+                            bsize, &mv.as_mv);
+    vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
+  }
+}
+
+static void build_motion_field(
+    VP9_COMP *cpi, int frame_idx,
+    YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
   VP9_COMMON *cm = &cpi->common;
   ThreadData *td = &cpi->td;
   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
@@ -6792,79 +6780,26 @@ static void build_motion_field(VP9_COMP *cpi, MACROBLOCKD *xd, int frame_idx,
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
   const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
-  int fs_loc_sort_size;
-  int fs_loc_heap_size;
   int mi_row, mi_col;
+  int rf_idx;
 
   tpl_frame->lambda = (pw * ph) >> 2;
   assert(pw * ph == tpl_frame->lambda << 2);
 
-  fs_loc_sort_size = 0;
-  for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
-      const int mb_y_offset =
-          mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
-      const int bw = 4 << b_width_log2_lookup[bsize];
-      const int bh = 4 << b_height_log2_lookup[bsize];
-      TplDepStats *tpl_stats =
-          &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
-      FEATURE_SCORE_LOC *fs_loc =
-          &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col];
-      tpl_stats->feature_score = get_feature_score(
-          xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh);
-      fs_loc->visited = 0;
-      fs_loc->feature_score = tpl_stats->feature_score;
-      fs_loc->mi_row = mi_row;
-      fs_loc->mi_col = mi_col;
-      cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc;
-      ++fs_loc_sort_size;
-    }
-  }
-
-  qsort(cpi->feature_score_loc_sort, fs_loc_sort_size,
-        sizeof(*cpi->feature_score_loc_sort), compare_feature_score);
-
-  for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
-      int rf_idx;
-      for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
-        TplDepStats *tpl_stats =
-            &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
-        tpl_stats->ready[rf_idx] = 0;
-      }
+  for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
+    MotionField *motion_field = vp9_motion_field_info_get_motion_field(
+        &cpi->motion_field_info, frame_idx, rf_idx, bsize);
+    if (ref_frame[rf_idx] == NULL) {
+      continue;
     }
-  }
-
-#if CHANGE_MV_SEARCH_ORDER
-#if !USE_PQSORT
-  for (i = 0; i < fs_loc_sort_size; ++i) {
-    FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i];
-    do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
-                     fs_loc->mi_col);
-  }
-#else   // !USE_PQSORT
-  fs_loc_heap_size = 0;
-  max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size,
-                cpi->feature_score_loc_sort[0]);
-
-  while (fs_loc_heap_size > 0) {
-    FEATURE_SCORE_LOC *fs_loc;
-    max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc);
-
-    do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
-                     fs_loc->mi_col);
-
-    add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col,
-                          &fs_loc_heap_size);
-  }
-#endif  // !USE_PQSORT
-#else   // CHANGE_MV_SEARCH_ORDER
-  for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
-      do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col);
+    vp9_motion_field_reset_mvs(motion_field);
+    for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+      for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+        do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
+                         bsize, mi_row, mi_col);
+      }
     }
   }
-#endif  // CHANGE_MV_SEARCH_ORDER
 }
 #endif  // CONFIG_NON_GREEDY_MV
 
@@ -6872,7 +6807,7 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
                               int frame_idx, BLOCK_SIZE bsize) {
   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
   YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
-  YV12_BUFFER_CONFIG *ref_frame[3] = { NULL, NULL, NULL };
+  YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
 
   VP9_COMMON *cm = &cpi->common;
   struct scale_factors sf;
@@ -6922,7 +6857,7 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
 
   // Prepare reference frame pointers. If any reference frame slot is
   // unavailable, the pointer will be set to Null.
-  for (idx = 0; idx < 3; ++idx) {
+  for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
     int rf_idx = gf_picture[frame_idx].ref_frame[idx];
     if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
   }
@@ -6945,13 +6880,15 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
   for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
        ++square_block_idx) {
     BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
-    build_motion_field(cpi, xd, frame_idx, ref_frame, square_bsize);
+    build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
   }
-  for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+  for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
     int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
     if (ref_frame_idx != -1) {
-      predict_mv_mode_arr(cpi, x, gf_picture, frame_idx, tpl_frame, rf_idx,
-                          bsize);
+      MotionField *motion_field = vp9_motion_field_info_get_motion_field(
+          &cpi->motion_field_info, frame_idx, rf_idx, bsize);
+      predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
+                          tpl_frame, rf_idx, bsize);
     }
   }
 #endif
@@ -7001,7 +6938,7 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
   const VP9_COMMON *cm = &cpi->common;
   int rf_idx;
   for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
-    for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+    for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
       const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
       int mi_row, mi_col;
       int ref_frame_idx;
@@ -7022,8 +6959,9 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
-              int_mv mv =
-                  *get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col);
+              int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
+                                                       frame_idx, rf_idx, bsize,
+                                                       mi_row, mi_col);
               printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
                      mv.as_mv.col);
             }
@@ -7067,26 +7005,8 @@ static void init_tpl_buffer(VP9_COMP *cpi) {
   const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
 #if CONFIG_NON_GREEDY_MV
-  int sqr_bsize;
   int rf_idx;
 
-  // TODO(angiebird): This probably needs further modifications to support
-  // frame scaling later on.
-  if (cpi->feature_score_loc_alloc == 0) {
-    // The smallest block size of motion field is 4x4, but the mi_unit is 8x8,
-    // therefore the number of units is "mi_rows * mi_cols * 4" here.
-    CHECK_MEM_ERROR(
-        cm, cpi->feature_score_loc_arr,
-        vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->feature_score_loc_arr)));
-    CHECK_MEM_ERROR(cm, cpi->feature_score_loc_sort,
-                    vpx_calloc(mi_rows * mi_cols * 4,
-                               sizeof(*cpi->feature_score_loc_sort)));
-    CHECK_MEM_ERROR(cm, cpi->feature_score_loc_heap,
-                    vpx_calloc(mi_rows * mi_cols * 4,
-                               sizeof(*cpi->feature_score_loc_heap)));
-
-    cpi->feature_score_loc_alloc = 1;
-  }
   vpx_free(cpi->select_mv_arr);
   CHECK_MEM_ERROR(
       cm, cpi->select_mv_arr,
@@ -7101,16 +7021,7 @@ static void init_tpl_buffer(VP9_COMP *cpi) {
       continue;
 
 #if CONFIG_NON_GREEDY_MV
-    for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
-      for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
-        vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]);
-        CHECK_MEM_ERROR(
-            cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize],
-            vpx_calloc(
-                mi_rows * mi_cols * 4,
-                sizeof(
-                    *cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize])));
-      }
+    for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
       CHECK_MEM_ERROR(
           cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
@@ -7141,6 +7052,25 @@ static void init_tpl_buffer(VP9_COMP *cpi) {
   }
 }
 
+static void free_tpl_buffer(VP9_COMP *cpi) {
+  int frame;
+#if CONFIG_NON_GREEDY_MV
+  vp9_free_motion_field_info(&cpi->motion_field_info);
+  vpx_free(cpi->select_mv_arr);
+#endif
+  for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
+#if CONFIG_NON_GREEDY_MV
+    int rf_idx;
+    for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
+      vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
+      vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
+    }
+#endif
+    vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
+    cpi->tpl_stats[frame].is_valid = 0;
+  }
+}
+
 static void setup_tpl_stats(VP9_COMP *cpi) {
   GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
@@ -7165,9 +7095,39 @@ static void setup_tpl_stats(VP9_COMP *cpi) {
 #endif  // CONFIG_NON_GREEDY_MV
 }
 
+static void init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
+  encode_frame_result->show_idx = -1;  // Actual encoding doesn't happen.
+}
+
+#if !CONFIG_REALTIME_ONLY
+static void update_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result,
+                                       int show_idx,
+                                       FRAME_UPDATE_TYPE update_type,
+                                       const YV12_BUFFER_CONFIG *source_frame,
+                                       const YV12_BUFFER_CONFIG *coded_frame,
+                                       int quantize_index, uint32_t bit_depth,
+                                       uint32_t input_bit_depth) {
+  PSNR_STATS psnr;
+#if CONFIG_VP9_HIGHBITDEPTH
+  vpx_calc_highbd_psnr(source_frame, coded_frame, &psnr, bit_depth,
+                       input_bit_depth);
+#else
+  (void)bit_depth;
+  (void)input_bit_depth;
+  vpx_calc_psnr(source_frame, coded_frame, &psnr);
+#endif
+  encode_frame_result->psnr = psnr.psnr[0];
+  encode_frame_result->sse = psnr.sse[0];
+  encode_frame_result->show_idx = show_idx;
+  encode_frame_result->update_type = update_type;
+  encode_frame_result->quantize_index = quantize_index;
+}
+#endif  // !CONFIG_REALTIME_ONLY
+
 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
                             size_t *size, uint8_t *dest, int64_t *time_stamp,
-                            int64_t *time_end, int flush) {
+                            int64_t *time_end, int flush,
+                            ENCODE_FRAME_RESULT *encode_frame_result) {
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   VP9_COMMON *const cm = &cpi->common;
   BufferPool *const pool = cm->buffer_pool;
@@ -7179,6 +7139,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   int arf_src_index;
   const int gf_group_index = cpi->twopass.gf_group.index;
   int i;
+  init_encode_frame_result(encode_frame_result);
 
   if (is_one_pass_cbr_svc(cpi)) {
     vp9_one_pass_cbr_svc_start_layer(cpi);
@@ -7284,10 +7245,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
     if (source != NULL) {
       cm->show_frame = 1;
       cm->intra_only = 0;
-      // if the flags indicate intra frame, but if the current picture is for
-      // non-zero spatial layer, it should not be an intra picture.
+      // If the flags indicate intra frame, but if the current picture is for
+      // spatial layer above first_spatial_layer_to_encode, it should not be an
+      // intra picture.
       if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc &&
-          cpi->svc.spatial_layer_id > 0) {
+          cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
         source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
       }
 
@@ -7313,12 +7275,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
     *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
   } else {
     *size = 0;
-#if !CONFIG_REALTIME_ONLY
-    if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
-      vp9_end_first_pass(cpi); /* get last stats packet */
-      cpi->twopass.first_pass_done = 1;
-    }
-#endif  // !CONFIG_REALTIME_ONLY
     return -1;
   }
 
@@ -7389,6 +7345,19 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
     cpi->kmeans_data_arr_alloc = 1;
   }
 
+#if CONFIG_NON_GREEDY_MV
+  {
+    const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+    const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+    Status status = vp9_alloc_motion_field_info(
+        &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols);
+    if (status == STATUS_FAILED) {
+      vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR,
+                         "vp9_alloc_motion_field_info failed");
+    }
+  }
+#endif  // CONFIG_NON_GREEDY_MV
+
   if (gf_group_index == 1 &&
       cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
       cpi->sf.enable_tpl_model) {
@@ -7432,6 +7401,25 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
     vp9_first_pass(cpi, source);
   } else if (oxcf->pass == 2 && !cpi->use_svc) {
     Pass2Encode(cpi, size, dest, frame_flags);
+    // update_encode_frame_result() depends on twopass.gf_group.index and
+    // cm->new_fb_idx and cpi->Source are updated for current properly and have
+    // not been updated for the next frame yet.
+    // The update locations are as follows.
+    // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero()
+    // for the first frame in the gf_group and is updated for the next frame at
+    // vp9_twopass_postencode_update().
+    // 2) cpi->Source is updated at the beginging of this function, i.e.
+    // vp9_get_compressed_data()
+    // 3) cm->new_fb_idx is updated at the beginging of this function by
+    // get_free_fb(cm)
+    // TODO(angiebird): Improve the codebase to make the update of frame
+    // dependent variables more robust.
+    update_encode_frame_result(
+        encode_frame_result, source->show_idx,
+        cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
+        cpi->Source, get_frame_new_buffer(cm), vp9_get_quantizer(cpi),
+        cpi->oxcf.input_bit_depth, cm->bit_depth);
+    vp9_twopass_postencode_update(cpi);
   } else if (cpi->use_svc) {
     SvcEncode(cpi, size, dest, frame_flags);
   } else {
@@ -7464,9 +7452,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   vpx_usec_timer_mark(&cmptimer);
   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
 
-  // Should we calculate metrics for the frame.
-  if (is_psnr_calc_enabled(cpi)) generate_psnr_packet(cpi);
-
   if (cpi->keep_level_stats && oxcf->pass != 1)
     update_level_info(cpi, size, arf_src_index);
 
@@ -7703,15 +7688,15 @@ int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
                          unsigned int height) {
   VP9_COMMON *cm = &cpi->common;
 #if CONFIG_VP9_HIGHBITDEPTH
-  check_initial_width(cpi, cm->use_highbitdepth, 1, 1);
+  update_initial_width(cpi, cm->use_highbitdepth, 1, 1);
 #else
-  check_initial_width(cpi, 1, 1);
+  update_initial_width(cpi, 0, 1, 1);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 #if CONFIG_VP9_TEMPORAL_DENOISING
   setup_denoiser_buffer(cpi);
 #endif
-
+  alloc_raw_frame_buffers(cpi);
   if (width) {
     cm->width = width;
     if (cm->width > cpi->initial_width) {
@@ -7740,7 +7725,7 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
   return;
 }
 
-int vp9_get_quantizer(VP9_COMP *cpi) { return cpi->common.base_qindex; }
+int vp9_get_quantizer(const VP9_COMP *cpi) { return cpi->common.base_qindex; }
 
 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
   if (flags &
diff --git a/libvpx/vp9/encoder/vp9_encoder.h b/libvpx/vp9/encoder/vp9_encoder.h
index f157fdfc5..0a8623efb 100644
--- a/libvpx/vp9/encoder/vp9_encoder.h
+++ b/libvpx/vp9/encoder/vp9_encoder.h
@@ -20,8 +20,10 @@
 #include "vpx_dsp/ssim.h"
 #endif
 #include "vpx_dsp/variance.h"
+#include "vpx_dsp/psnr.h"
 #include "vpx_ports/system_state.h"
 #include "vpx_util/vpx_thread.h"
+#include "vpx_util/vpx_timestamp.h"
 
 #include "vp9/common/vp9_alloccommon.h"
 #include "vp9/common/vp9_ppflags.h"
@@ -152,7 +154,10 @@ typedef struct VP9EncoderConfig {
   int height;                    // height of data passed to the compressor
   unsigned int input_bit_depth;  // Input bit depth.
   double init_framerate;         // set to passed in framerate
-  int64_t target_bandwidth;      // bandwidth to be used in bits per second
+  vpx_rational_t g_timebase;  // equivalent to g_timebase in vpx_codec_enc_cfg_t
+  vpx_rational64_t g_timebase_in_ts;  // g_timebase * TICKS_PER_SEC
+
+  int64_t target_bandwidth;  // bandwidth to be used in bits per second
 
   int noise_sensitivity;  // pre processing blur: recommendation 0
   int sharpness;          // sharpening output: recommendation 0:
@@ -259,7 +264,6 @@ typedef struct VP9EncoderConfig {
   unsigned int target_level;
 
   vpx_fixed_buf_t two_pass_stats_in;
-  struct vpx_codec_pkt_list *output_pkt_list;
 
 #if CONFIG_FP_MB_STATS
   vpx_fixed_buf_t firstpass_mb_stats_in;
@@ -293,16 +297,9 @@ typedef struct TplDepStats {
 
   int ref_frame_index;
   int_mv mv;
-
-#if CONFIG_NON_GREEDY_MV
-  int ready[3];
-  int64_t sse_arr[3];
-  double feature_score;
-#endif
 } TplDepStats;
 
 #if CONFIG_NON_GREEDY_MV
-#define SQUARE_BLOCK_SIZES 4
 
 #define ZERO_MV_MODE 0
 #define NEW_MV_MODE 1
@@ -322,54 +319,11 @@ typedef struct TplDepFrame {
   int base_qindex;
 #if CONFIG_NON_GREEDY_MV
   int lambda;
-  int_mv *pyramid_mv_arr[3][SQUARE_BLOCK_SIZES];
   int *mv_mode_arr[3];
   double *rd_diff_arr[3];
 #endif
 } TplDepFrame;
 
-#if CONFIG_NON_GREEDY_MV
-static INLINE int get_square_block_idx(BLOCK_SIZE bsize) {
-  if (bsize == BLOCK_4X4) {
-    return 0;
-  }
-  if (bsize == BLOCK_8X8) {
-    return 1;
-  }
-  if (bsize == BLOCK_16X16) {
-    return 2;
-  }
-  if (bsize == BLOCK_32X32) {
-    return 3;
-  }
-  assert(0 && "ERROR: non-square block size");
-  return -1;
-}
-
-static INLINE BLOCK_SIZE square_block_idx_to_bsize(int square_block_idx) {
-  if (square_block_idx == 0) {
-    return BLOCK_4X4;
-  }
-  if (square_block_idx == 1) {
-    return BLOCK_8X8;
-  }
-  if (square_block_idx == 2) {
-    return BLOCK_16X16;
-  }
-  if (square_block_idx == 3) {
-    return BLOCK_32X32;
-  }
-  assert(0 && "ERROR: invalid square_block_idx");
-  return BLOCK_INVALID;
-}
-
-static INLINE int_mv *get_pyramid_mv(const TplDepFrame *tpl_frame, int rf_idx,
-                                     BLOCK_SIZE bsize, int mi_row, int mi_col) {
-  return &tpl_frame->pyramid_mv_arr[rf_idx][get_square_block_idx(bsize)]
-                                   [mi_row * tpl_frame->stride + mi_col];
-}
-#endif
-
 #define TPL_DEP_COST_SCALE_LOG2 4
 
 // TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
@@ -533,7 +487,6 @@ typedef enum {
 
 typedef struct {
   int8_t level_index;
-  uint8_t rc_config_updated;
   uint8_t fail_flag;
   int max_frame_size;   // in bits
   double max_cpb_size;  // in bits
@@ -555,15 +508,6 @@ typedef struct EncFrameBuf {
 
 // Maximum operating frame buffer size needed for a GOP using ARF reference.
 #define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS)
-#if CONFIG_NON_GREEDY_MV
-typedef struct FEATURE_SCORE_LOC {
-  int visited;
-  double feature_score;
-  int mi_row;
-  int mi_col;
-} FEATURE_SCORE_LOC;
-#endif
-
 #define MAX_KMEANS_GROUPS 8
 
 typedef struct KMEANS_DATA {
@@ -572,7 +516,33 @@ typedef struct KMEANS_DATA {
   int group_idx;
 } KMEANS_DATA;
 
+#if CONFIG_RATE_CTRL
+typedef struct ENCODE_COMMAND {
+  int use_external_quantize_index;
+  int external_quantize_index;
+} ENCODE_COMMAND;
+
+static INLINE void encode_command_init(ENCODE_COMMAND *encode_command) {
+  vp9_zero(*encode_command);
+  encode_command->use_external_quantize_index = 0;
+  encode_command->external_quantize_index = -1;
+}
+
+static INLINE void encode_command_set_external_quantize_index(
+    ENCODE_COMMAND *encode_command, int quantize_index) {
+  encode_command->use_external_quantize_index = 1;
+  encode_command->external_quantize_index = quantize_index;
+}
+
+static INLINE void encode_command_reset_external_quantize_index(
+    ENCODE_COMMAND *encode_command) {
+  encode_command->use_external_quantize_index = 0;
+  encode_command->external_quantize_index = -1;
+}
+#endif  // CONFIG_RATE_CTRL
+
 typedef struct VP9_COMP {
+  FRAME_INFO frame_info;
   QUANTS quants;
   ThreadData td;
   MB_MODE_INFO_EXT *mbmi_ext_base;
@@ -611,11 +581,8 @@ typedef struct VP9_COMP {
   int kmeans_count_ls[MAX_KMEANS_GROUPS];
   int kmeans_ctr_num;
 #if CONFIG_NON_GREEDY_MV
+  MotionFieldInfo motion_field_info;
   int tpl_ready;
-  int feature_score_loc_alloc;
-  FEATURE_SCORE_LOC *feature_score_loc_arr;
-  FEATURE_SCORE_LOC **feature_score_loc_sort;
-  FEATURE_SCORE_LOC **feature_score_loc_heap;
   int_mv *select_mv_arr;
 #endif
 
@@ -878,11 +845,23 @@ typedef struct VP9_COMP {
 
   int multi_layer_arf;
   vpx_roi_map_t roi;
+#if CONFIG_RATE_CTRL
+  ENCODE_COMMAND encode_command;
+#endif
 } VP9_COMP;
 
+typedef struct ENCODE_FRAME_RESULT {
+  int show_idx;
+  FRAME_UPDATE_TYPE update_type;
+  double psnr;
+  uint64_t sse;
+  int quantize_index;
+} ENCODE_FRAME_RESULT;
+
 void vp9_initialize_enc(void);
 
-struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
+void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt);
+struct VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
                                        BufferPool *const pool);
 void vp9_remove_compressor(VP9_COMP *cpi);
 
@@ -896,7 +875,8 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
 
 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
                             size_t *size, uint8_t *dest, int64_t *time_stamp,
-                            int64_t *time_end, int flush);
+                            int64_t *time_end, int flush,
+                            ENCODE_FRAME_RESULT *encode_frame_result);
 
 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
                               vp9_ppflags_t *flags);
@@ -948,7 +928,7 @@ static INLINE void stack_init(int *stack, int length) {
   for (idx = 0; idx < length; ++idx) stack[idx] = -1;
 }
 
-int vp9_get_quantizer(struct VP9_COMP *cpi);
+int vp9_get_quantizer(const VP9_COMP *cpi);
 
 static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) {
   return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
@@ -1121,6 +1101,8 @@ void vp9_new_framerate(VP9_COMP *cpi, double framerate);
 
 void vp9_set_row_mt(VP9_COMP *cpi);
 
+int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr);
+
 #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
 
 #ifdef __cplusplus
diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c
index e0acf563b..57ab583cf 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/libvpx/vp9/encoder/vp9_firstpass.c
@@ -84,14 +84,8 @@ static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) {
   return 1;
 }
 
-static void output_stats(FIRSTPASS_STATS *stats,
-                         struct vpx_codec_pkt_list *pktlist) {
-  struct vpx_codec_cx_pkt pkt;
-  pkt.kind = VPX_CODEC_STATS_PKT;
-  pkt.data.twopass_stats.buf = stats;
-  pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
-  vpx_codec_pkt_list_add(pktlist, &pkt);
-
+static void output_stats(FIRSTPASS_STATS *stats) {
+  (void)stats;
 // TEMP debug code
 #if OUTPUT_FPF
   {
@@ -220,14 +214,14 @@ static void subtract_stats(FIRSTPASS_STATS *section,
 // bars and partially discounts other 0 energy areas.
 #define MIN_ACTIVE_AREA 0.5
 #define MAX_ACTIVE_AREA 1.0
-static double calculate_active_area(const VP9_COMP *cpi,
+static double calculate_active_area(const FRAME_INFO *frame_info,
                                     const FIRSTPASS_STATS *this_frame) {
   double active_pct;
 
   active_pct =
       1.0 -
       ((this_frame->intra_skip_pct / 2) +
-       ((this_frame->inactive_zone_rows * 2) / (double)cpi->common.mb_rows));
+       ((this_frame->inactive_zone_rows * 2) / (double)frame_info->mb_rows));
   return fclamp(active_pct, MIN_ACTIVE_AREA, MAX_ACTIVE_AREA);
 }
 
@@ -260,17 +254,16 @@ static double calculate_mod_frame_score(const VP9_COMP *cpi,
   // remaining active MBs. The correction here assumes that coding
   // 0.5N blocks of complexity 2X is a little easier than coding N
   // blocks of complexity X.
-  modified_score *=
-      pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION);
+  modified_score *= pow(calculate_active_area(&cpi->frame_info, this_frame),
+                        ACT_AREA_CORRECTION);
 
   return modified_score;
 }
 
-static double calculate_norm_frame_score(const VP9_COMP *cpi,
-                                         const TWO_PASS *twopass,
-                                         const VP9EncoderConfig *oxcf,
-                                         const FIRSTPASS_STATS *this_frame,
-                                         const double av_err) {
+static double calc_norm_frame_score(const VP9EncoderConfig *oxcf,
+                                    const FRAME_INFO *frame_info,
+                                    const FIRSTPASS_STATS *this_frame,
+                                    double mean_mod_score, double av_err) {
   double modified_score =
       av_err * pow(this_frame->coded_error * this_frame->weight /
                        DOUBLE_DIVIDE_CHECK(av_err),
@@ -285,14 +278,22 @@ static double calculate_norm_frame_score(const VP9_COMP *cpi,
   // 0.5N blocks of complexity 2X is a little easier than coding N
   // blocks of complexity X.
   modified_score *=
-      pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION);
+      pow(calculate_active_area(frame_info, this_frame), ACT_AREA_CORRECTION);
 
   // Normalize to a midpoint score.
-  modified_score /= DOUBLE_DIVIDE_CHECK(twopass->mean_mod_score);
-
+  modified_score /= DOUBLE_DIVIDE_CHECK(mean_mod_score);
   return fclamp(modified_score, min_score, max_score);
 }
 
+static double calculate_norm_frame_score(const VP9_COMP *cpi,
+                                         const TWO_PASS *twopass,
+                                         const VP9EncoderConfig *oxcf,
+                                         const FIRSTPASS_STATS *this_frame,
+                                         const double av_err) {
+  return calc_norm_frame_score(oxcf, &cpi->frame_info, this_frame,
+                               twopass->mean_mod_score, av_err);
+}
+
 // This function returns the maximum target rate per frame.
 static int frame_max_bits(const RATE_CONTROL *rc,
                           const VP9EncoderConfig *oxcf) {
@@ -312,7 +313,8 @@ void vp9_init_first_pass(VP9_COMP *cpi) {
 }
 
 void vp9_end_first_pass(VP9_COMP *cpi) {
-  output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
+  output_stats(&cpi->twopass.total_stats);
+  cpi->twopass.first_pass_done = 1;
   vpx_free(cpi->twopass.fp_mb_float_stats);
   cpi->twopass.fp_mb_float_stats = NULL;
 }
@@ -1421,7 +1423,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
 
     // Don't want to do output stats with a stack variable!
     twopass->this_frame_stats = fps;
-    output_stats(&twopass->this_frame_stats, cpi->output_pkt_list);
+    output_stats(&twopass->this_frame_stats);
     accumulate_stats(&twopass->total_stats, &fps);
 
 #if CONFIG_FP_MB_STATS
@@ -1747,15 +1749,16 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
 #define LOW_CODED_ERR_PER_MB 10.0
 #define NCOUNT_FRAME_II_THRESH 6.0
 
-static double get_sr_decay_rate(const VP9_COMP *cpi,
+static double get_sr_decay_rate(const FRAME_INFO *frame_info,
                                 const FIRSTPASS_STATS *frame) {
   double sr_diff = (frame->sr_coded_error - frame->coded_error);
   double sr_decay = 1.0;
   double modified_pct_inter;
   double modified_pcnt_intra;
   const double motion_amplitude_part =
-      frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) /
-                            (cpi->initial_height + cpi->initial_width));
+      frame->pcnt_motion *
+      ((frame->mvc_abs + frame->mvr_abs) /
+       (frame_info->frame_height + frame_info->frame_width));
 
   modified_pct_inter = frame->pcnt_inter;
   if ((frame->coded_error > LOW_CODED_ERR_PER_MB) &&
@@ -1776,74 +1779,73 @@ static double get_sr_decay_rate(const VP9_COMP *cpi,
 
 // This function gives an estimate of how badly we believe the prediction
 // quality is decaying from frame to frame.
-static double get_zero_motion_factor(const VP9_COMP *cpi,
-                                     const FIRSTPASS_STATS *frame) {
-  const double zero_motion_pct = frame->pcnt_inter - frame->pcnt_motion;
-  double sr_decay = get_sr_decay_rate(cpi, frame);
+static double get_zero_motion_factor(const FRAME_INFO *frame_info,
+                                     const FIRSTPASS_STATS *frame_stats) {
+  const double zero_motion_pct =
+      frame_stats->pcnt_inter - frame_stats->pcnt_motion;
+  double sr_decay = get_sr_decay_rate(frame_info, frame_stats);
   return VPXMIN(sr_decay, zero_motion_pct);
 }
 
 #define ZM_POWER_FACTOR 0.75
 
-static double get_prediction_decay_rate(const VP9_COMP *cpi,
-                                        const FIRSTPASS_STATS *next_frame) {
-  const double sr_decay_rate = get_sr_decay_rate(cpi, next_frame);
+static double get_prediction_decay_rate(const FRAME_INFO *frame_info,
+                                        const FIRSTPASS_STATS *frame_stats) {
+  const double sr_decay_rate = get_sr_decay_rate(frame_info, frame_stats);
   const double zero_motion_factor =
-      (0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion),
+      (0.95 * pow((frame_stats->pcnt_inter - frame_stats->pcnt_motion),
                   ZM_POWER_FACTOR));
 
   return VPXMAX(zero_motion_factor,
                 (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));
 }
 
+static int get_show_idx(const TWO_PASS *twopass) {
+  return (int)(twopass->stats_in - twopass->stats_in_start);
+}
 // Function to test for a condition where a complex transition is followed
 // by a static section. For example in slide shows where there is a fade
 // between slides. This is to help with more optimal kf and gf positioning.
-static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval,
-                                      int still_interval,
-                                      double loop_decay_rate,
-                                      double last_decay_rate) {
-  TWO_PASS *const twopass = &cpi->twopass;
-  RATE_CONTROL *const rc = &cpi->rc;
-
-  // Break clause to detect very still sections after motion
-  // For example a static image after a fade or other transition
-  // instead of a clean scene cut.
-  if (frame_interval > rc->min_gf_interval && loop_decay_rate >= 0.999 &&
-      last_decay_rate < 0.9) {
-    int j;
-
-    // Look ahead a few frames to see if static condition persists...
-    for (j = 0; j < still_interval; ++j) {
-      const FIRSTPASS_STATS *stats = &twopass->stats_in[j];
-      if (stats >= twopass->stats_in_end) break;
-
-      if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break;
-    }
+static int check_transition_to_still(const FIRST_PASS_INFO *first_pass_info,
+                                     int show_idx, int still_interval) {
+  int j;
+  int num_frames = fps_get_num_frames(first_pass_info);
+  if (show_idx + still_interval > num_frames) {
+    return 0;
+  }
 
-    // Only if it does do we signal a transition to still.
-    return j == still_interval;
+  // Look ahead a few frames to see if static condition persists...
+  for (j = 0; j < still_interval; ++j) {
+    const FIRSTPASS_STATS *stats =
+        fps_get_frame_stats(first_pass_info, show_idx + j);
+    if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break;
   }
 
-  return 0;
+  // Only if it does do we signal a transition to still.
+  return j == still_interval;
 }
 
 // This function detects a flash through the high relative pcnt_second_ref
 // score in the frame following a flash frame. The offset passed in should
 // reflect this.
-static int detect_flash(const TWO_PASS *twopass, int offset) {
-  const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset);
-
+static int detect_flash_from_frame_stats(const FIRSTPASS_STATS *frame_stats) {
   // What we are looking for here is a situation where there is a
   // brief break in prediction (such as a flash) but subsequent frames
   // are reasonably well predicted by an earlier (pre flash) frame.
   // The recovery after a flash is indicated by a high pcnt_second_ref
   // useage or a second ref coded error notabley lower than the last
   // frame coded error.
-  return next_frame != NULL &&
-         ((next_frame->sr_coded_error < next_frame->coded_error) ||
-          ((next_frame->pcnt_second_ref > next_frame->pcnt_inter) &&
-           (next_frame->pcnt_second_ref >= 0.5)));
+  if (frame_stats == NULL) {
+    return 0;
+  }
+  return (frame_stats->sr_coded_error < frame_stats->coded_error) ||
+         ((frame_stats->pcnt_second_ref > frame_stats->pcnt_inter) &&
+          (frame_stats->pcnt_second_ref >= 0.5));
+}
+
+static int detect_flash(const TWO_PASS *twopass, int offset) {
+  const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset);
+  return detect_flash_from_frame_stats(next_frame);
 }
 
 // Update the motion related elements to the GF arf boost calculation.
@@ -1876,13 +1878,15 @@ static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats,
 
 #define BASELINE_ERR_PER_MB 12500.0
 #define GF_MAX_BOOST 96.0
-static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame,
+static double calc_frame_boost(const FRAME_INFO *frame_info,
+                               const FIRSTPASS_STATS *this_frame,
+                               int avg_frame_qindex,
                                double this_frame_mv_in_out) {
   double frame_boost;
-  const double lq = vp9_convert_qindex_to_q(
-      cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth);
+  const double lq =
+      vp9_convert_qindex_to_q(avg_frame_qindex, frame_info->bit_depth);
   const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5);
-  const double active_area = calculate_active_area(cpi, this_frame);
+  const double active_area = calculate_active_area(frame_info, this_frame);
 
   // Underlying boost factor is based on inter error ratio.
   frame_boost = (BASELINE_ERR_PER_MB * active_area) /
@@ -1921,7 +1925,8 @@ static double calc_kf_frame_boost(VP9_COMP *cpi,
   const double lq = vp9_convert_qindex_to_q(
       cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth);
   const double boost_q_correction = VPXMIN((0.50 + (lq * 0.015)), 2.00);
-  const double active_area = calculate_active_area(cpi, this_frame);
+  const double active_area =
+      calculate_active_area(&cpi->frame_info, this_frame);
 
   // Underlying boost factor is based on inter error ratio.
   frame_boost = (kf_err_per_mb(cpi) * active_area) /
@@ -1946,8 +1951,10 @@ static double calc_kf_frame_boost(VP9_COMP *cpi,
   return VPXMIN(frame_boost, max_boost * boost_q_correction);
 }
 
-static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
-  TWO_PASS *const twopass = &cpi->twopass;
+static int compute_arf_boost(const FRAME_INFO *frame_info,
+                             const FIRST_PASS_INFO *first_pass_info,
+                             int arf_show_idx, int f_frames, int b_frames,
+                             int avg_frame_qindex) {
   int i;
   double boost_score = 0.0;
   double mv_ratio_accumulator = 0.0;
@@ -1960,7 +1967,10 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
 
   // Search forward from the proposed arf/next gf position.
   for (i = 0; i < f_frames; ++i) {
-    const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i);
+    const FIRSTPASS_STATS *this_frame =
+        fps_get_frame_stats(first_pass_info, arf_show_idx + i);
+    const FIRSTPASS_STATS *next_frame =
+        fps_get_frame_stats(first_pass_info, arf_show_idx + i + 1);
     if (this_frame == NULL) break;
 
     // Update the motion related elements to the boost calculation.
@@ -1970,17 +1980,19 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
 
     // We want to discount the flash frame itself and the recovery
     // frame that follows as both will have poor scores.
-    flash_detected = detect_flash(twopass, i) || detect_flash(twopass, i + 1);
+    flash_detected = detect_flash_from_frame_stats(this_frame) ||
+                     detect_flash_from_frame_stats(next_frame);
 
     // Accumulate the effect of prediction quality decay.
     if (!flash_detected) {
-      decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
+      decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame);
       decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
                               ? MIN_DECAY_FACTOR
                               : decay_accumulator;
     }
-    boost_score += decay_accumulator *
-                   calc_frame_boost(cpi, this_frame, this_frame_mv_in_out);
+    boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame,
+                                                        avg_frame_qindex,
+                                                        this_frame_mv_in_out);
   }
 
   arf_boost = (int)boost_score;
@@ -1995,7 +2007,10 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
 
   // Search backward towards last gf position.
   for (i = -1; i >= -b_frames; --i) {
-    const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i);
+    const FIRSTPASS_STATS *this_frame =
+        fps_get_frame_stats(first_pass_info, arf_show_idx + i);
+    const FIRSTPASS_STATS *next_frame =
+        fps_get_frame_stats(first_pass_info, arf_show_idx + i + 1);
     if (this_frame == NULL) break;
 
     // Update the motion related elements to the boost calculation.
@@ -2005,17 +2020,19 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
 
     // We want to discount the the flash frame itself and the recovery
     // frame that follows as both will have poor scores.
-    flash_detected = detect_flash(twopass, i) || detect_flash(twopass, i + 1);
+    flash_detected = detect_flash_from_frame_stats(this_frame) ||
+                     detect_flash_from_frame_stats(next_frame);
 
     // Cumulative effect of prediction quality decay.
     if (!flash_detected) {
-      decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
+      decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame);
       decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
                               ? MIN_DECAY_FACTOR
                               : decay_accumulator;
     }
-    boost_score += decay_accumulator *
-                   calc_frame_boost(cpi, this_frame, this_frame_mv_in_out);
+    boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame,
+                                                        avg_frame_qindex,
+                                                        this_frame_mv_in_out);
   }
   arf_boost += (int)boost_score;
 
@@ -2026,6 +2043,15 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
   return arf_boost;
 }
 
+static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) {
+  const FRAME_INFO *frame_info = &cpi->frame_info;
+  TWO_PASS *const twopass = &cpi->twopass;
+  const int avg_inter_frame_qindex = cpi->rc.avg_frame_qindex[INTER_FRAME];
+  int arf_show_idx = get_show_idx(twopass);
+  return compute_arf_boost(frame_info, &twopass->first_pass_info, arf_show_idx,
+                           f_frames, b_frames, avg_inter_frame_qindex);
+}
+
 // Calculate a section intra ratio used in setting max loop filter.
 static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin,
                                          const FIRSTPASS_STATS *end,
@@ -2060,8 +2086,19 @@ static int64_t calculate_total_gf_group_bits(VP9_COMP *cpi,
 
   // Calculate the bits to be allocated to the group as a whole.
   if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0.0)) {
+    int key_frame_interval = rc->frames_since_key + rc->frames_to_key;
+    int distance_from_next_key_frame =
+        rc->frames_to_key -
+        (rc->baseline_gf_interval + rc->source_alt_ref_pending);
+    int max_gf_bits_bias = rc->avg_frame_bandwidth;
+    double gf_interval_bias_bits_normalize_factor =
+        (double)rc->baseline_gf_interval / 16;
     total_group_bits = (int64_t)(twopass->kf_group_bits *
                                  (gf_group_err / twopass->kf_group_error_left));
+    // TODO(ravi): Experiment with different values of max_gf_bits_bias
+    total_group_bits +=
+        (int64_t)((double)distance_from_next_key_frame / key_frame_interval *
+                  max_gf_bits_bias * gf_interval_bias_bits_normalize_factor);
   } else {
     total_group_bits = 0;
   }
@@ -2415,194 +2452,94 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise,
 #define ARF_ABS_ZOOM_THRESH 4.0
 
 #define MAX_GF_BOOST 5400
-static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
-  VP9_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
-  VP9EncoderConfig *const oxcf = &cpi->oxcf;
-  TWO_PASS *const twopass = &cpi->twopass;
-  FIRSTPASS_STATS next_frame;
-  const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
-  int i;
 
-  double gf_group_err = 0.0;
-  double gf_group_raw_error = 0.0;
-  double gf_group_noise = 0.0;
-  double gf_group_skip_pct = 0.0;
-  double gf_group_inactive_zone_rows = 0.0;
-  double gf_group_inter = 0.0;
-  double gf_group_motion = 0.0;
-  double gf_first_frame_err = 0.0;
-  double mod_frame_err = 0.0;
+typedef struct RANGE {
+  int min;
+  int max;
+} RANGE;
 
-  double mv_ratio_accumulator = 0.0;
-  double zero_motion_accumulator = 1.0;
+static int get_gop_coding_frame_num(
+    int *use_alt_ref, const FRAME_INFO *frame_info,
+    const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc,
+    int gf_start_show_idx, const RANGE *active_gf_interval,
+    double gop_intra_factor, int lag_in_frames) {
   double loop_decay_rate = 1.00;
-  double last_loop_decay_rate = 1.00;
-
+  double mv_ratio_accumulator = 0.0;
   double this_frame_mv_in_out = 0.0;
   double mv_in_out_accumulator = 0.0;
   double abs_mv_in_out_accumulator = 0.0;
-  double mv_ratio_accumulator_thresh;
-  double abs_mv_in_out_thresh;
   double sr_accumulator = 0.0;
-  const double av_err = get_distribution_av_err(cpi, twopass);
-  unsigned int allow_alt_ref = is_altref_enabled(cpi);
-
-  int flash_detected;
-  int active_max_gf_interval;
-  int active_min_gf_interval;
-  int64_t gf_group_bits;
-  int gf_arf_bits;
-  const int is_key_frame = frame_is_intra_only(cm);
-  const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
-  int is_alt_ref_flash = 0;
-
-  double gop_intra_factor = 1.0;
-  int gop_frames;
-
-  // Reset the GF group data structures unless this is a key
-  // frame in which case it will already have been done.
-  if (is_key_frame == 0) {
-    vp9_zero(twopass->gf_group);
-  }
-
-  vpx_clear_system_state();
-  vp9_zero(next_frame);
-
-  // Load stats for the current frame.
-  mod_frame_err =
-      calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
-
-  // Note the error of the frame at the start of the group. This will be
-  // the GF frame error if we code a normal gf.
-  gf_first_frame_err = mod_frame_err;
-
-  // If this is a key frame or the overlay from a previous arf then
-  // the error score / cost of this frame has already been accounted for.
-  if (arf_active_or_kf) {
-    gf_group_err -= gf_first_frame_err;
-    gf_group_raw_error -= this_frame->coded_error;
-    gf_group_noise -= this_frame->frame_noise_energy;
-    gf_group_skip_pct -= this_frame->intra_skip_pct;
-    gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
-    gf_group_inter -= this_frame->pcnt_inter;
-    gf_group_motion -= this_frame->pcnt_motion;
-  }
-
   // Motion breakout threshold for loop below depends on image size.
-  mv_ratio_accumulator_thresh =
-      (cpi->initial_height + cpi->initial_width) / 4.0;
-  abs_mv_in_out_thresh = ARF_ABS_ZOOM_THRESH;
-
-  // Set a maximum and minimum interval for the GF group.
-  // If the image appears almost completely static we can extend beyond this.
-  {
-    int int_max_q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
-                                                  cpi->common.bit_depth));
-    int q_term = (cm->current_video_frame == 0)
-                     ? int_max_q / 32
-                     : (int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex,
-                                                     cpi->common.bit_depth) /
-                             6);
-    active_min_gf_interval =
-        rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200);
-    active_min_gf_interval =
-        VPXMIN(active_min_gf_interval, rc->max_gf_interval + arf_active_or_kf);
-
-    // The value chosen depends on the active Q range. At low Q we have
-    // bits to spare and are better with a smaller interval and smaller boost.
-    // At high Q when there are few bits to spare we are better with a longer
-    // interval to spread the cost of the GF.
-    active_max_gf_interval = 11 + arf_active_or_kf + VPXMIN(5, q_term);
-
-    // Force max GF interval to be odd.
-    active_max_gf_interval = active_max_gf_interval | 0x01;
-
-    // We have: active_min_gf_interval <=
-    // rc->max_gf_interval + arf_active_or_kf.
-    if (active_max_gf_interval < active_min_gf_interval) {
-      active_max_gf_interval = active_min_gf_interval;
-    } else {
-      active_max_gf_interval = VPXMIN(active_max_gf_interval,
-                                      rc->max_gf_interval + arf_active_or_kf);
+  double mv_ratio_accumulator_thresh =
+      (frame_info->frame_height + frame_info->frame_width) / 4.0;
+  double zero_motion_accumulator = 1.0;
+  int gop_coding_frames;
+
+  *use_alt_ref = 1;
+  gop_coding_frames = 0;
+  while (gop_coding_frames < rc->static_scene_max_gf_interval &&
+         gop_coding_frames < rc->frames_to_key) {
+    const FIRSTPASS_STATS *next_next_frame;
+    const FIRSTPASS_STATS *next_frame;
+    int flash_detected;
+    ++gop_coding_frames;
+
+    next_frame = fps_get_frame_stats(first_pass_info,
+                                     gf_start_show_idx + gop_coding_frames);
+    if (next_frame == NULL) {
+      break;
     }
 
-    // Would the active max drop us out just before the near the next kf?
-    if ((active_max_gf_interval <= rc->frames_to_key) &&
-        (active_max_gf_interval >= (rc->frames_to_key - rc->min_gf_interval)))
-      active_max_gf_interval = rc->frames_to_key / 2;
-  }
-  active_max_gf_interval =
-      VPXMAX(active_max_gf_interval, active_min_gf_interval);
-
-  if (cpi->multi_layer_arf) {
-    int layers = 0;
-    int max_layers = VPXMIN(MAX_ARF_LAYERS, cpi->oxcf.enable_auto_arf);
-
-    // Adapt the intra_error factor to active_max_gf_interval limit.
-    for (i = active_max_gf_interval; i > 0; i >>= 1) ++layers;
-
-    layers = VPXMIN(max_layers, layers);
-    gop_intra_factor += (layers * 0.25);
-  }
-
-  i = 0;
-  while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
-    ++i;
-
-    // Accumulate error score of frames in this gf group.
-    mod_frame_err =
-        calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
-    gf_group_err += mod_frame_err;
-    gf_group_raw_error += this_frame->coded_error;
-    gf_group_noise += this_frame->frame_noise_energy;
-    gf_group_skip_pct += this_frame->intra_skip_pct;
-    gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
-    gf_group_inter += this_frame->pcnt_inter;
-    gf_group_motion += this_frame->pcnt_motion;
-
-    if (EOF == input_stats(twopass, &next_frame)) break;
-
     // Test for the case where there is a brief flash but the prediction
     // quality back to an earlier frame is then restored.
-    flash_detected = detect_flash(twopass, 0);
+    next_next_frame = fps_get_frame_stats(
+        first_pass_info, gf_start_show_idx + gop_coding_frames + 1);
+    flash_detected = detect_flash_from_frame_stats(next_next_frame);
 
     // Update the motion related elements to the boost calculation.
     accumulate_frame_motion_stats(
-        &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
+        next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
         &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
 
     // Monitor for static sections.
-    if ((rc->frames_since_key + i - 1) > 1) {
-      zero_motion_accumulator = VPXMIN(
-          zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+    if ((rc->frames_since_key + gop_coding_frames - 1) > 1) {
+      zero_motion_accumulator =
+          VPXMIN(zero_motion_accumulator,
+                 get_zero_motion_factor(frame_info, next_frame));
     }
 
     // Accumulate the effect of prediction quality decay.
     if (!flash_detected) {
-      last_loop_decay_rate = loop_decay_rate;
-      loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+      double last_loop_decay_rate = loop_decay_rate;
+      loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame);
 
       // Break clause to detect very still sections after motion. For example,
       // a static image after a fade or other transition.
-      if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
-                                     last_loop_decay_rate)) {
-        allow_alt_ref = 0;
-        break;
+      if (gop_coding_frames > rc->min_gf_interval && loop_decay_rate >= 0.999 &&
+          last_loop_decay_rate < 0.9) {
+        int still_interval = 5;
+        if (check_transition_to_still(first_pass_info,
+                                      gf_start_show_idx + gop_coding_frames,
+                                      still_interval)) {
+          *use_alt_ref = 0;
+          break;
+        }
       }
 
       // Update the accumulator for second ref error difference.
       // This is intended to give an indication of how much the coded error is
       // increasing over time.
-      if (i == 1) {
-        sr_accumulator += next_frame.coded_error;
+      if (gop_coding_frames == 1) {
+        sr_accumulator += next_frame->coded_error;
       } else {
-        sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error);
+        sr_accumulator +=
+            (next_frame->sr_coded_error - next_frame->coded_error);
       }
     }
 
     // Break out conditions.
-    // Break at maximum of active_max_gf_interval unless almost totally static.
+    // Break at maximum of active_gf_interval->max unless almost totally
+    // static.
     //
     // Note that the addition of a test of rc->source_alt_ref_active is
     // deliberate. The effect of this is that after a normal altref group even
@@ -2612,59 +2549,230 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     // such as a fade, the arf group spanning the transition may not be coded
     // at a very high quality and hence this frame (with its overlay) is a
     // poor golden frame to use for an extended group.
-    if (((i >= active_max_gf_interval) &&
-         ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) ||
-        (
-            // Don't break out with a very short interval.
-            (i >= active_min_gf_interval) &&
-            // If possible dont break very close to a kf
-            ((rc->frames_to_key - i) >= rc->min_gf_interval) && (i & 0x01) &&
-            (!flash_detected) &&
-            ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
-             (abs_mv_in_out_accumulator > abs_mv_in_out_thresh) ||
-             (sr_accumulator > gop_intra_factor * next_frame.intra_error)))) {
+    if ((gop_coding_frames >= active_gf_interval->max) &&
+        ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) {
+      break;
+    }
+    if (
+        // Don't break out with a very short interval.
+        (gop_coding_frames >= active_gf_interval->min) &&
+        // If possible dont break very close to a kf
+        ((rc->frames_to_key - gop_coding_frames) >= rc->min_gf_interval) &&
+        (gop_coding_frames & 0x01) && (!flash_detected) &&
+        ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
+         (abs_mv_in_out_accumulator > ARF_ABS_ZOOM_THRESH) ||
+         (sr_accumulator > gop_intra_factor * next_frame->intra_error))) {
       break;
     }
+  }
+  *use_alt_ref &= zero_motion_accumulator < 0.995;
+  *use_alt_ref &= gop_coding_frames < lag_in_frames;
+  *use_alt_ref &= gop_coding_frames >= rc->min_gf_interval;
+  return gop_coding_frames;
+}
+
+static RANGE get_active_gf_inverval_range(
+    const FRAME_INFO *frame_info, const RATE_CONTROL *rc, int arf_active_or_kf,
+    int gf_start_show_idx, int active_worst_quality, int last_boosted_qindex) {
+  RANGE active_gf_interval;
+#if CONFIG_RATE_CTRL
+  (void)frame_info;
+  (void)gf_start_show_idx;
+  (void)active_worst_quality;
+  (void)last_boosted_qindex;
+  active_gf_interval.min = rc->min_gf_interval + arf_active_or_kf + 2;
 
-    *this_frame = next_frame;
+  active_gf_interval.max = 16 + arf_active_or_kf;
+
+  if ((active_gf_interval.max <= rc->frames_to_key) &&
+      (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) {
+    active_gf_interval.min = rc->frames_to_key / 2;
+    active_gf_interval.max = rc->frames_to_key / 2;
+  }
+#else
+  int int_max_q = (int)(vp9_convert_qindex_to_q(active_worst_quality,
+                                                frame_info->bit_depth));
+  int q_term = (gf_start_show_idx == 0)
+                   ? int_max_q / 32
+                   : (int)(vp9_convert_qindex_to_q(last_boosted_qindex,
+                                                   frame_info->bit_depth) /
+                           6);
+  active_gf_interval.min =
+      rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200);
+  active_gf_interval.min =
+      VPXMIN(active_gf_interval.min, rc->max_gf_interval + arf_active_or_kf);
+
+  // The value chosen depends on the active Q range. At low Q we have
+  // bits to spare and are better with a smaller interval and smaller boost.
+  // At high Q when there are few bits to spare we are better with a longer
+  // interval to spread the cost of the GF.
+  active_gf_interval.max = 11 + arf_active_or_kf + VPXMIN(5, q_term);
+
+  // Force max GF interval to be odd.
+  active_gf_interval.max = active_gf_interval.max | 0x01;
+
+  // We have: active_gf_interval.min <=
+  // rc->max_gf_interval + arf_active_or_kf.
+  if (active_gf_interval.max < active_gf_interval.min) {
+    active_gf_interval.max = active_gf_interval.min;
+  } else {
+    active_gf_interval.max =
+        VPXMIN(active_gf_interval.max, rc->max_gf_interval + arf_active_or_kf);
+  }
+
+  // Would the active max drop us out just before the near the next kf?
+  if ((active_gf_interval.max <= rc->frames_to_key) &&
+      (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) {
+    active_gf_interval.max = rc->frames_to_key / 2;
+  }
+  active_gf_interval.max =
+      VPXMAX(active_gf_interval.max, active_gf_interval.min);
+#endif
+  return active_gf_interval;
+}
+
+static int get_arf_layers(int multi_layer_arf, int max_layers,
+                          int coding_frame_num) {
+  assert(max_layers <= MAX_ARF_LAYERS);
+  if (multi_layer_arf) {
+    int layers = 0;
+    int i;
+    for (i = coding_frame_num; i > 0; i >>= 1) {
+      ++layers;
+    }
+    layers = VPXMIN(max_layers, layers);
+    return layers;
+  } else {
+    return 1;
+  }
+}
+
+static void define_gf_group(VP9_COMP *cpi, int gf_start_show_idx) {
+  VP9_COMMON *const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
+  VP9EncoderConfig *const oxcf = &cpi->oxcf;
+  TWO_PASS *const twopass = &cpi->twopass;
+  const FRAME_INFO *frame_info = &cpi->frame_info;
+  const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;
+  const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
+  int gop_coding_frames;
+
+  double gf_group_err = 0.0;
+  double gf_group_raw_error = 0.0;
+  double gf_group_noise = 0.0;
+  double gf_group_skip_pct = 0.0;
+  double gf_group_inactive_zone_rows = 0.0;
+  double gf_group_inter = 0.0;
+  double gf_group_motion = 0.0;
+
+  int allow_alt_ref = is_altref_enabled(cpi);
+  int use_alt_ref;
+
+  int64_t gf_group_bits;
+  int gf_arf_bits;
+  const int is_key_frame = frame_is_intra_only(cm);
+  // If this is a key frame or the overlay from a previous arf then
+  // the error score / cost of this frame has already been accounted for.
+  const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
+  int is_alt_ref_flash = 0;
+
+  double gop_intra_factor;
+  int gop_frames;
+  RANGE active_gf_interval;
+
+  // Reset the GF group data structures unless this is a key
+  // frame in which case it will already have been done.
+  if (is_key_frame == 0) {
+    vp9_zero(twopass->gf_group);
+  }
+
+  vpx_clear_system_state();
+
+  active_gf_interval = get_active_gf_inverval_range(
+      frame_info, rc, arf_active_or_kf, gf_start_show_idx,
+      twopass->active_worst_quality, rc->last_boosted_qindex);
+
+  if (cpi->multi_layer_arf) {
+    int arf_layers = get_arf_layers(cpi->multi_layer_arf, oxcf->enable_auto_arf,
+                                    active_gf_interval.max);
+    gop_intra_factor = 1.0 + 0.25 * arf_layers;
+  } else {
+    gop_intra_factor = 1.0;
+  }
+
+  {
+    gop_coding_frames = get_gop_coding_frame_num(
+        &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx,
+        &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames);
+    use_alt_ref &= allow_alt_ref;
   }
 
   // Was the group length constrained by the requirement for a new KF?
-  rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
+  rc->constrained_gf_group = (gop_coding_frames >= rc->frames_to_key) ? 1 : 0;
 
   // Should we use the alternate reference frame.
-  if ((zero_motion_accumulator < 0.995) && allow_alt_ref &&
-      (twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) &&
-      (i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) {
-    const int forward_frames = (rc->frames_to_key - i >= i - 1)
-                                   ? i - 1
-                                   : VPXMAX(0, rc->frames_to_key - i);
+  if (use_alt_ref) {
+    const int f_frames =
+        (rc->frames_to_key - gop_coding_frames >= gop_coding_frames - 1)
+            ? gop_coding_frames - 1
+            : VPXMAX(0, rc->frames_to_key - gop_coding_frames);
+    const int b_frames = gop_coding_frames - 1;
+    const int avg_inter_frame_qindex = rc->avg_frame_qindex[INTER_FRAME];
+    // TODO(angiebird): figure out why arf's location is assigned this way
+    const int arf_show_idx = VPXMIN(gf_start_show_idx + gop_coding_frames + 1,
+                                    fps_get_num_frames(first_pass_info));
 
     // Calculate the boost for alt ref.
-    rc->gfu_boost = calc_arf_boost(cpi, forward_frames, (i - 1));
+    rc->gfu_boost =
+        compute_arf_boost(frame_info, first_pass_info, arf_show_idx, f_frames,
+                          b_frames, avg_inter_frame_qindex);
     rc->source_alt_ref_pending = 1;
   } else {
-    reset_fpf_position(twopass, start_pos);
-    rc->gfu_boost = VPXMIN(MAX_GF_BOOST, calc_arf_boost(cpi, (i - 1), 0));
+    const int f_frames = gop_coding_frames - 1;
+    const int b_frames = 0;
+    const int avg_inter_frame_qindex = rc->avg_frame_qindex[INTER_FRAME];
+    // TODO(angiebird): figure out why arf's location is assigned this way
+    const int gld_show_idx =
+        VPXMIN(gf_start_show_idx + 1, fps_get_num_frames(first_pass_info));
+    const int arf_boost =
+        compute_arf_boost(frame_info, first_pass_info, gld_show_idx, f_frames,
+                          b_frames, avg_inter_frame_qindex);
+    rc->gfu_boost = VPXMIN(MAX_GF_BOOST, arf_boost);
     rc->source_alt_ref_pending = 0;
   }
 
 #define LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR 0.2
   rc->arf_active_best_quality_adjustment_factor = 1.0;
-  if (rc->source_alt_ref_pending && !is_lossless_requested(&cpi->oxcf) &&
-      rc->frames_to_key <= rc->arf_active_best_quality_adjustment_window) {
-    rc->arf_active_best_quality_adjustment_factor =
-        LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR +
-        (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) *
-            (rc->frames_to_key - i) /
-            VPXMAX(1, (rc->arf_active_best_quality_adjustment_window - i));
+  rc->arf_increase_active_best_quality = 0;
+
+  if (!is_lossless_requested(&cpi->oxcf)) {
+    if (rc->frames_since_key >= rc->frames_to_key) {
+      // Increase the active best quality in the second half of key frame
+      // interval.
+      rc->arf_active_best_quality_adjustment_factor =
+          LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR +
+          (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) *
+              (rc->frames_to_key - gop_coding_frames) /
+              (VPXMAX(1, ((rc->frames_to_key + rc->frames_since_key) / 2 -
+                          gop_coding_frames)));
+      rc->arf_increase_active_best_quality = 1;
+    } else if ((rc->frames_to_key - gop_coding_frames) > 0) {
+      // Reduce the active best quality in the first half of key frame interval.
+      rc->arf_active_best_quality_adjustment_factor =
+          LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR +
+          (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) *
+              (rc->frames_since_key + gop_coding_frames) /
+              (VPXMAX(1, (rc->frames_to_key + rc->frames_since_key) / 2 +
+                             gop_coding_frames));
+      rc->arf_increase_active_best_quality = -1;
+    }
   }
 
 #ifdef AGGRESSIVE_VBR
   // Limit maximum boost based on interval length.
-  rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 140);
+  rc->gfu_boost = VPXMIN((int)rc->gfu_boost, gop_coding_frames * 140);
 #else
-  rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 200);
+  rc->gfu_boost = VPXMIN((int)rc->gfu_boost, gop_coding_frames * 200);
 #endif
 
   // Cap the ARF boost when perceptual quality AQ mode is enabled. This is
@@ -2674,14 +2782,34 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   if (oxcf->aq_mode == PERCEPTUAL_AQ)
     rc->gfu_boost = VPXMIN(rc->gfu_boost, MIN_ARF_GF_BOOST);
 
-  rc->baseline_gf_interval = i - rc->source_alt_ref_pending;
-
-  // Reset the file position.
-  reset_fpf_position(twopass, start_pos);
+  rc->baseline_gf_interval = gop_coding_frames - rc->source_alt_ref_pending;
 
   if (rc->source_alt_ref_pending)
     is_alt_ref_flash = detect_flash(twopass, rc->baseline_gf_interval);
 
+  {
+    const double av_err = get_distribution_av_err(cpi, twopass);
+    const double mean_mod_score = twopass->mean_mod_score;
+    // If the first frame is a key frame or the overlay from a previous arf then
+    // the error score / cost of this frame has already been accounted for.
+    int start_idx = arf_active_or_kf ? 1 : 0;
+    int j;
+    for (j = start_idx; j < gop_coding_frames; ++j) {
+      int show_idx = gf_start_show_idx + j;
+      const FIRSTPASS_STATS *frame_stats =
+          fps_get_frame_stats(first_pass_info, show_idx);
+      // Accumulate error score of frames in this gf group.
+      gf_group_err += calc_norm_frame_score(oxcf, frame_info, frame_stats,
+                                            mean_mod_score, av_err);
+      gf_group_raw_error += frame_stats->coded_error;
+      gf_group_noise += frame_stats->frame_noise_energy;
+      gf_group_skip_pct += frame_stats->intra_skip_pct;
+      gf_group_inactive_zone_rows += frame_stats->inactive_zone_rows;
+      gf_group_inter += frame_stats->pcnt_inter;
+      gf_group_motion += frame_stats->pcnt_motion;
+    }
+  }
+
   // Calculate the bits to be allocated to the gf/arf group as a whole
   gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
 
@@ -2847,17 +2975,23 @@ static int intra_step_transition(const FIRSTPASS_STATS *this_frame,
 // Test for very low intra complexity which could cause false key frames
 #define V_LOW_INTRA 0.5
 
-static int test_candidate_kf(TWO_PASS *twopass,
-                             const FIRSTPASS_STATS *last_frame,
-                             const FIRSTPASS_STATS *this_frame,
-                             const FIRSTPASS_STATS *next_frame) {
+static int test_candidate_kf(const FIRST_PASS_INFO *first_pass_info,
+                             int show_idx) {
+  const FIRSTPASS_STATS *last_frame =
+      fps_get_frame_stats(first_pass_info, show_idx - 1);
+  const FIRSTPASS_STATS *this_frame =
+      fps_get_frame_stats(first_pass_info, show_idx);
+  const FIRSTPASS_STATS *next_frame =
+      fps_get_frame_stats(first_pass_info, show_idx + 1);
   int is_viable_kf = 0;
   double pcnt_intra = 1.0 - this_frame->pcnt_inter;
 
   // Does the frame satisfy the primary criteria of a key frame?
   // See above for an explanation of the test criteria.
   // If so, then examine how well it predicts subsequent frames.
-  if (!detect_flash(twopass, -1) && !detect_flash(twopass, 0) &&
+  detect_flash_from_frame_stats(next_frame);
+  if (!detect_flash_from_frame_stats(this_frame) &&
+      !detect_flash_from_frame_stats(next_frame) &&
       (this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
       ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
        (slide_transition(this_frame, last_frame, next_frame)) ||
@@ -2870,42 +3004,41 @@ static int test_candidate_kf(TWO_PASS *twopass,
           DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) <
          KF_II_ERR_THRESHOLD)))) {
     int i;
-    const FIRSTPASS_STATS *start_pos = twopass->stats_in;
-    FIRSTPASS_STATS local_next_frame = *next_frame;
     double boost_score = 0.0;
     double old_boost_score = 0.0;
     double decay_accumulator = 1.0;
 
     // Examine how well the key frame predicts subsequent frames.
     for (i = 0; i < 16; ++i) {
-      double next_iiratio = (II_FACTOR * local_next_frame.intra_error /
-                             DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error));
+      const FIRSTPASS_STATS *frame_stats =
+          fps_get_frame_stats(first_pass_info, show_idx + 1 + i);
+      double next_iiratio = (II_FACTOR * frame_stats->intra_error /
+                             DOUBLE_DIVIDE_CHECK(frame_stats->coded_error));
 
       if (next_iiratio > KF_II_MAX) next_iiratio = KF_II_MAX;
 
       // Cumulative effect of decay in prediction quality.
-      if (local_next_frame.pcnt_inter > 0.85)
-        decay_accumulator *= local_next_frame.pcnt_inter;
+      if (frame_stats->pcnt_inter > 0.85)
+        decay_accumulator *= frame_stats->pcnt_inter;
       else
-        decay_accumulator *= (0.85 + local_next_frame.pcnt_inter) / 2.0;
+        decay_accumulator *= (0.85 + frame_stats->pcnt_inter) / 2.0;
 
       // Keep a running total.
       boost_score += (decay_accumulator * next_iiratio);
 
       // Test various breakout clauses.
-      if ((local_next_frame.pcnt_inter < 0.05) || (next_iiratio < 1.5) ||
-          (((local_next_frame.pcnt_inter - local_next_frame.pcnt_neutral) <
-            0.20) &&
+      if ((frame_stats->pcnt_inter < 0.05) || (next_iiratio < 1.5) ||
+          (((frame_stats->pcnt_inter - frame_stats->pcnt_neutral) < 0.20) &&
            (next_iiratio < 3.0)) ||
           ((boost_score - old_boost_score) < 3.0) ||
-          (local_next_frame.intra_error < V_LOW_INTRA)) {
+          (frame_stats->intra_error < V_LOW_INTRA)) {
         break;
       }
 
       old_boost_score = boost_score;
 
       // Get the next frame details
-      if (EOF == input_stats(twopass, &local_next_frame)) break;
+      if (show_idx + 1 + i == fps_get_num_frames(first_pass_info) - 1) break;
     }
 
     // If there is tolerable prediction for at least the next 3 frames then
@@ -2913,9 +3046,6 @@ static int test_candidate_kf(TWO_PASS *twopass,
     if (boost_score > 30.0 && (i > 3)) {
       is_viable_kf = 1;
     } else {
-      // Reset the file position
-      reset_fpf_position(twopass, start_pos);
-
       is_viable_kf = 0;
     }
   }
@@ -2938,19 +3068,80 @@ static int test_candidate_kf(TWO_PASS *twopass,
 #define MAX_KF_TOT_BOOST 5400
 #endif
 
-static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
-  int i, j;
+int vp9_get_frames_to_next_key(const VP9EncoderConfig *oxcf,
+                               const FRAME_INFO *frame_info,
+                               const FIRST_PASS_INFO *first_pass_info,
+                               int kf_show_idx, int min_gf_interval) {
+  double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
+  int j;
+  int frames_to_key;
+  int max_frames_to_key = first_pass_info->num_frames - kf_show_idx;
+  max_frames_to_key = VPXMIN(max_frames_to_key, oxcf->key_freq);
+
+  // Initialize the decay rates for the recent frames to check
+  for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0;
+  // Find the next keyframe.
+  if (!oxcf->auto_key) {
+    frames_to_key = max_frames_to_key;
+  } else {
+    frames_to_key = 1;
+    while (frames_to_key < max_frames_to_key) {
+      // Provided that we are not at the end of the file...
+      if (kf_show_idx + frames_to_key + 1 < first_pass_info->num_frames) {
+        double loop_decay_rate;
+        double decay_accumulator;
+        const FIRSTPASS_STATS *next_frame = fps_get_frame_stats(
+            first_pass_info, kf_show_idx + frames_to_key + 1);
+
+        // Check for a scene cut.
+        if (test_candidate_kf(first_pass_info, kf_show_idx + frames_to_key))
+          break;
+
+        // How fast is the prediction quality decaying?
+        loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame);
+
+        // We want to know something about the recent past... rather than
+        // as used elsewhere where we are concerned with decay in prediction
+        // quality since the last GF or KF.
+        recent_loop_decay[(frames_to_key - 1) % FRAMES_TO_CHECK_DECAY] =
+            loop_decay_rate;
+        decay_accumulator = 1.0;
+        for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j)
+          decay_accumulator *= recent_loop_decay[j];
+
+        // Special check for transition or high motion followed by a
+        // static scene.
+        if ((frames_to_key - 1) > min_gf_interval && loop_decay_rate >= 0.999 &&
+            decay_accumulator < 0.9) {
+          int still_interval = oxcf->key_freq - (frames_to_key - 1);
+          // TODO(angiebird): Figure out why we use "+1" here
+          int show_idx = kf_show_idx + frames_to_key;
+          if (check_transition_to_still(first_pass_info, show_idx,
+                                        still_interval)) {
+            break;
+          }
+        }
+      }
+      ++frames_to_key;
+    }
+  }
+  return frames_to_key;
+}
+
+static void find_next_key_frame(VP9_COMP *cpi, int kf_show_idx) {
+  int i;
   RATE_CONTROL *const rc = &cpi->rc;
   TWO_PASS *const twopass = &cpi->twopass;
   GF_GROUP *const gf_group = &twopass->gf_group;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
-  const FIRSTPASS_STATS first_frame = *this_frame;
+  const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;
+  const FRAME_INFO *frame_info = &cpi->frame_info;
   const FIRSTPASS_STATS *const start_position = twopass->stats_in;
+  const FIRSTPASS_STATS *keyframe_stats =
+      fps_get_frame_stats(first_pass_info, kf_show_idx);
   FIRSTPASS_STATS next_frame;
-  FIRSTPASS_STATS last_frame;
   int kf_bits = 0;
   int64_t max_kf_bits;
-  double decay_accumulator = 1.0;
   double zero_motion_accumulator = 1.0;
   double zero_motion_sum = 0.0;
   double zero_motion_avg;
@@ -2962,10 +3153,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   double kf_mod_err = 0.0;
   double kf_raw_err = 0.0;
   double kf_group_err = 0.0;
-  double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
   double sr_accumulator = 0.0;
   double abs_mv_in_out_accumulator = 0.0;
   const double av_err = get_distribution_av_err(cpi, twopass);
+  const double mean_mod_score = twopass->mean_mod_score;
   vp9_zero(next_frame);
 
   cpi->common.frame_type = KEY_FRAME;
@@ -2989,96 +3180,29 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   twopass->kf_group_bits = 0;          // Total bits available to kf group
   twopass->kf_group_error_left = 0.0;  // Group modified error score.
 
-  kf_raw_err = this_frame->intra_error;
-  kf_mod_err =
-      calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
-
-  // Initialize the decay rates for the recent frames to check
-  for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0;
-
-  // Find the next keyframe.
-  i = 0;
-  while (twopass->stats_in < twopass->stats_in_end &&
-         rc->frames_to_key < cpi->oxcf.key_freq) {
-    // Accumulate kf group error.
-    kf_group_err +=
-        calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
-
-    // Load the next frame's stats.
-    last_frame = *this_frame;
-    input_stats(twopass, this_frame);
+  kf_raw_err = keyframe_stats->intra_error;
+  kf_mod_err = calc_norm_frame_score(oxcf, frame_info, keyframe_stats,
+                                     mean_mod_score, av_err);
 
-    // Provided that we are not at the end of the file...
-    if (cpi->oxcf.auto_key && twopass->stats_in < twopass->stats_in_end) {
-      double loop_decay_rate;
-
-      // Check for a scene cut.
-      if (test_candidate_kf(twopass, &last_frame, this_frame,
-                            twopass->stats_in))
-        break;
-
-      // How fast is the prediction quality decaying?
-      loop_decay_rate = get_prediction_decay_rate(cpi, twopass->stats_in);
-
-      // We want to know something about the recent past... rather than
-      // as used elsewhere where we are concerned with decay in prediction
-      // quality since the last GF or KF.
-      recent_loop_decay[i % FRAMES_TO_CHECK_DECAY] = loop_decay_rate;
-      decay_accumulator = 1.0;
-      for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j)
-        decay_accumulator *= recent_loop_decay[j];
-
-      // Special check for transition or high motion followed by a
-      // static scene.
-      if (detect_transition_to_still(cpi, i, cpi->oxcf.key_freq - i,
-                                     loop_decay_rate, decay_accumulator))
-        break;
-
-      // Step on to the next frame.
-      ++rc->frames_to_key;
-
-      // If we don't have a real key frame within the next two
-      // key_freq intervals then break out of the loop.
-      if (rc->frames_to_key >= 2 * cpi->oxcf.key_freq) break;
-    } else {
-      ++rc->frames_to_key;
-    }
-    ++i;
-  }
+  rc->frames_to_key = vp9_get_frames_to_next_key(
+      oxcf, frame_info, first_pass_info, kf_show_idx, rc->min_gf_interval);
 
   // If there is a max kf interval set by the user we must obey it.
   // We already breakout of the loop above at 2x max.
   // This code centers the extra kf if the actual natural interval
   // is between 1x and 2x.
-  if (cpi->oxcf.auto_key && rc->frames_to_key > cpi->oxcf.key_freq) {
-    FIRSTPASS_STATS tmp_frame = first_frame;
-
-    rc->frames_to_key /= 2;
-
-    // Reset to the start of the group.
-    reset_fpf_position(twopass, start_position);
-
-    kf_group_err = 0.0;
-
-    // Rescan to get the correct error data for the forced kf group.
-    for (i = 0; i < rc->frames_to_key; ++i) {
-      kf_group_err +=
-          calculate_norm_frame_score(cpi, twopass, oxcf, &tmp_frame, av_err);
-      input_stats(twopass, &tmp_frame);
-    }
-    rc->next_key_frame_forced = 1;
-  } else if (twopass->stats_in == twopass->stats_in_end ||
-             rc->frames_to_key >= cpi->oxcf.key_freq) {
+  if (rc->frames_to_key >= cpi->oxcf.key_freq) {
     rc->next_key_frame_forced = 1;
   } else {
     rc->next_key_frame_forced = 0;
   }
 
-  // Special case for the last key frame of the file.
-  if (twopass->stats_in >= twopass->stats_in_end) {
+  for (i = 0; i < rc->frames_to_key; ++i) {
+    const FIRSTPASS_STATS *frame_stats =
+        fps_get_frame_stats(first_pass_info, kf_show_idx + i);
     // Accumulate kf group error.
-    kf_group_err +=
-        calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
+    kf_group_err += calc_norm_frame_score(oxcf, frame_info, frame_stats,
+                                          mean_mod_score, av_err);
   }
 
   // Calculate the number of bits that should be assigned to the kf group.
@@ -3103,9 +3227,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   }
   twopass->kf_group_bits = VPXMAX(0, twopass->kf_group_bits);
 
-  // Reset the first pass file position.
-  reset_fpf_position(twopass, start_position);
-
   // Scan through the kf group collating various stats used to determine
   // how many bits to spend on it.
   boost_score = 0.0;
@@ -3144,8 +3265,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
       // Monitor for static sections.
       // First frame in kf group the second ref indicator is invalid.
       if (i > 0) {
-        zero_motion_accumulator = VPXMIN(
-            zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+        zero_motion_accumulator =
+            VPXMIN(zero_motion_accumulator,
+                   get_zero_motion_factor(&cpi->frame_info, &next_frame));
       } else {
         zero_motion_accumulator =
             next_frame.pcnt_inter - next_frame.pcnt_motion;
@@ -3214,6 +3336,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   gf_group->bit_allocation[0] = kf_bits;
   gf_group->update_type[0] = KF_UPDATE;
   gf_group->rf_level[0] = KF_STD;
+  gf_group->layer_depth[0] = 0;
 
   // Note the total error score of the kf group minus the key frame itself.
   twopass->kf_group_error_left = (kf_group_err - kf_mod_err);
@@ -3227,11 +3350,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     // Default to normal-sized frame on keyframes.
     cpi->rc.next_frame_size_selector = UNSCALED;
   }
-#define ARF_ACTIVE_BEST_QUALITY_ADJUSTMENT_WINDOW_SIZE 64
-  // TODO(ravi.chaudhary@ittiam.com): Experiment without the below min
-  // condition. This might be helpful for small key frame intervals.
-  rc->arf_active_best_quality_adjustment_window =
-      VPXMIN(ARF_ACTIVE_BEST_QUALITY_ADJUSTMENT_WINDOW_SIZE, rc->frames_to_key);
 }
 
 static int is_skippable_frame(const VP9_COMP *cpi) {
@@ -3259,6 +3377,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   TWO_PASS *const twopass = &cpi->twopass;
   GF_GROUP *const gf_group = &twopass->gf_group;
   FIRSTPASS_STATS this_frame;
+  const int show_idx = cm->current_video_frame;
 
   if (!twopass->stats_in) return;
 
@@ -3341,18 +3460,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
 
   // Keyframe and section processing.
   if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
-    FIRSTPASS_STATS this_frame_copy;
-    this_frame_copy = this_frame;
     // Define next KF group and assign bits to it.
-    find_next_key_frame(cpi, &this_frame);
-    this_frame = this_frame_copy;
+    find_next_key_frame(cpi, show_idx);
   } else {
     cm->frame_type = INTER_FRAME;
   }
 
   // Define a new GF/ARF group. (Should always enter here for key frames).
   if (rc->frames_till_gf_update_due == 0) {
-    define_gf_group(cpi, &this_frame);
+    define_gf_group(cpi, show_idx);
 
     rc->frames_till_gf_update_due = rc->baseline_gf_interval;
 
@@ -3509,3 +3625,70 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
     }
   }
 }
+
+#if CONFIG_RATE_CTRL
+// Under CONFIG_RATE_CTRL, once the first_pass_info is ready, the number of
+// coding frames (including show frame and alt ref) can be determined.
+int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf,
+                             const FRAME_INFO *frame_info,
+                             const FIRST_PASS_INFO *first_pass_info,
+                             int multi_layer_arf, int allow_alt_ref) {
+  int coding_frame_num = 0;
+  RATE_CONTROL rc;
+  RANGE active_gf_interval;
+  int arf_layers;
+  double gop_intra_factor;
+  int use_alt_ref;
+  int gop_coding_frames;
+  int gop_show_frames;
+  int show_idx = 0;
+  int arf_active_or_kf = 1;
+  rc.static_scene_max_gf_interval = 250;
+  vp9_rc_init(oxcf, 1, &rc);
+
+  while (show_idx < first_pass_info->num_frames) {
+    if (rc.frames_to_key == 0) {
+      rc.frames_to_key = vp9_get_frames_to_next_key(
+          oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval);
+      arf_active_or_kf = 1;
+    } else {
+    }
+
+    {
+      int dummy = 0;
+      active_gf_interval = get_active_gf_inverval_range(
+          frame_info, &rc, arf_active_or_kf, show_idx, dummy, dummy);
+    }
+
+    arf_layers = get_arf_layers(multi_layer_arf, oxcf->enable_auto_arf,
+                                active_gf_interval.max);
+    if (multi_layer_arf) {
+      gop_intra_factor = 1.0 + 0.25 * arf_layers;
+    } else {
+      gop_intra_factor = 1.0;
+    }
+
+    gop_coding_frames = get_gop_coding_frame_num(
+        &use_alt_ref, frame_info, first_pass_info, &rc, show_idx,
+        &active_gf_interval, gop_intra_factor, oxcf->lag_in_frames);
+
+    use_alt_ref &= allow_alt_ref;
+
+    rc.source_alt_ref_active = use_alt_ref;
+    arf_active_or_kf = use_alt_ref;
+    gop_show_frames = gop_coding_frames - use_alt_ref;
+    rc.frames_to_key -= gop_show_frames;
+    rc.frames_since_key += gop_show_frames;
+    show_idx += gop_show_frames;
+    coding_frame_num += gop_show_frames + use_alt_ref;
+  }
+  return coding_frame_num;
+}
+#endif
+
+FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass) {
+  return twopass->this_frame_stats;
+}
+FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *twopass) {
+  return twopass->total_stats;
+}
diff --git a/libvpx/vp9/encoder/vp9_firstpass.h b/libvpx/vp9/encoder/vp9_firstpass.h
index a0a96e6ef..cfbc143c3 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.h
+++ b/libvpx/vp9/encoder/vp9_firstpass.h
@@ -13,6 +13,7 @@
 
 #include <assert.h>
 
+#include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/encoder/vp9_lookahead.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 
@@ -147,6 +148,30 @@ typedef struct {
 } GF_GROUP;
 
 typedef struct {
+  const FIRSTPASS_STATS *stats;
+  int num_frames;
+} FIRST_PASS_INFO;
+
+static INLINE void fps_init_first_pass_info(FIRST_PASS_INFO *first_pass_info,
+                                            const FIRSTPASS_STATS *stats,
+                                            int num_frames) {
+  first_pass_info->stats = stats;
+  first_pass_info->num_frames = num_frames;
+}
+
+static INLINE int fps_get_num_frames(const FIRST_PASS_INFO *first_pass_info) {
+  return first_pass_info->num_frames;
+}
+
+static INLINE const FIRSTPASS_STATS *fps_get_frame_stats(
+    const FIRST_PASS_INFO *first_pass_info, int show_idx) {
+  if (show_idx < 0 || show_idx >= first_pass_info->num_frames) {
+    return NULL;
+  }
+  return &first_pass_info->stats[show_idx];
+}
+
+typedef struct {
   unsigned int section_intra_rating;
   unsigned int key_frame_section_intra_rating;
   FIRSTPASS_STATS total_stats;
@@ -154,6 +179,7 @@ typedef struct {
   const FIRSTPASS_STATS *stats_in;
   const FIRSTPASS_STATS *stats_in_start;
   const FIRSTPASS_STATS *stats_in_end;
+  FIRST_PASS_INFO first_pass_info;
   FIRSTPASS_STATS total_left_stats;
   int first_pass_done;
   int64_t bits_left;
@@ -192,6 +218,7 @@ typedef struct {
   int extend_maxq;
   int extend_minq_fast;
   int arnr_strength_adjustment;
+  int last_qindex_of_arf_layer[MAX_ARF_LAYERS];
 
   GF_GROUP gf_group;
 } TWO_PASS;
@@ -219,6 +246,21 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
 void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width,
                           int *scaled_frame_height);
 
+struct VP9EncoderConfig;
+int vp9_get_frames_to_next_key(const struct VP9EncoderConfig *oxcf,
+                               const FRAME_INFO *frame_info,
+                               const FIRST_PASS_INFO *first_pass_info,
+                               int kf_show_idx, int min_gf_interval);
+#if CONFIG_RATE_CTRL
+int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf,
+                             const FRAME_INFO *frame_info,
+                             const FIRST_PASS_INFO *first_pass_info,
+                             int multi_layer_arf, int allow_alt_ref);
+#endif
+
+FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *two_pass);
+FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *two_pass);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/libvpx/vp9/encoder/vp9_lookahead.c b/libvpx/vp9/encoder/vp9_lookahead.c
index 392cd5d41..97838c38e 100644
--- a/libvpx/vp9/encoder/vp9_lookahead.c
+++ b/libvpx/vp9/encoder/vp9_lookahead.c
@@ -64,6 +64,7 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
     unsigned int i;
     ctx->max_sz = depth;
     ctx->buf = calloc(depth, sizeof(*ctx->buf));
+    ctx->next_show_idx = 0;
     if (!ctx->buf) goto bail;
     for (i = 0; i < depth; i++)
       if (vpx_alloc_frame_buffer(
@@ -81,12 +82,16 @@ bail:
 }
 
 #define USE_PARTIAL_COPY 0
+int vp9_lookahead_full(const struct lookahead_ctx *ctx) {
+  return ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz;
+}
+
+int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx) {
+  return ctx->next_show_idx;
+}
 
 int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
-                       int64_t ts_start, int64_t ts_end,
-#if CONFIG_VP9_HIGHBITDEPTH
-                       int use_highbitdepth,
-#endif
+                       int64_t ts_start, int64_t ts_end, int use_highbitdepth,
                        vpx_enc_frame_flags_t flags) {
   struct lookahead_entry *buf;
 #if USE_PARTIAL_COPY
@@ -101,8 +106,12 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
   int subsampling_x = src->subsampling_x;
   int subsampling_y = src->subsampling_y;
   int larger_dimensions, new_dimensions;
+#if !CONFIG_VP9_HIGHBITDEPTH
+  (void)use_highbitdepth;
+  assert(use_highbitdepth == 0);
+#endif
 
-  if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1;
+  if (vp9_lookahead_full(ctx)) return 1;
   ctx->sz++;
   buf = pop(ctx, &ctx->write_idx);
 
@@ -184,6 +193,8 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
   buf->ts_start = ts_start;
   buf->ts_end = ts_end;
   buf->flags = flags;
+  buf->show_idx = ctx->next_show_idx;
+  ++ctx->next_show_idx;
   return 0;
 }
 
diff --git a/libvpx/vp9/encoder/vp9_lookahead.h b/libvpx/vp9/encoder/vp9_lookahead.h
index c627bede2..dbbe3af58 100644
--- a/libvpx/vp9/encoder/vp9_lookahead.h
+++ b/libvpx/vp9/encoder/vp9_lookahead.h
@@ -25,6 +25,7 @@ struct lookahead_entry {
   YV12_BUFFER_CONFIG img;
   int64_t ts_start;
   int64_t ts_end;
+  int show_idx; /*The show_idx of this frame*/
   vpx_enc_frame_flags_t flags;
 };
 
@@ -32,10 +33,12 @@ struct lookahead_entry {
 #define MAX_PRE_FRAMES 1
 
 struct lookahead_ctx {
-  int max_sz;                  /* Absolute size of the queue */
-  int sz;                      /* Number of buffers currently in the queue */
-  int read_idx;                /* Read index */
-  int write_idx;               /* Write index */
+  int max_sz;        /* Absolute size of the queue */
+  int sz;            /* Number of buffers currently in the queue */
+  int read_idx;      /* Read index */
+  int write_idx;     /* Write index */
+  int next_show_idx; /* The show_idx that will be assigned to the next frame
+                        being pushed in the queue*/
   struct lookahead_entry *buf; /* Buffer list */
 };
 
@@ -57,6 +60,23 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
  */
 void vp9_lookahead_destroy(struct lookahead_ctx *ctx);
 
+/**\brief Check if lookahead is full
+ *
+ * \param[in] ctx         Pointer to the lookahead context
+ *
+ * Return 1 if lookahead is full, otherwise return 0.
+ */
+int vp9_lookahead_full(const struct lookahead_ctx *ctx);
+
+/**\brief Return the next_show_idx
+ *
+ * \param[in] ctx         Pointer to the lookahead context
+ *
+ * Return the show_idx that will be assigned to the next
+ * frame pushed by vp9_lookahead_push()
+ */
+int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx);
+
 /**\brief Enqueue a source buffer
  *
  * This function will copy the source image into a new framebuffer with
@@ -73,10 +93,7 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx);
  * \param[in] active_map  Map that specifies which macroblock is active
  */
 int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
-                       int64_t ts_start, int64_t ts_end,
-#if CONFIG_VP9_HIGHBITDEPTH
-                       int use_highbitdepth,
-#endif
+                       int64_t ts_start, int64_t ts_end, int use_highbitdepth,
                        vpx_enc_frame_flags_t flags);
 
 /**\brief Get the next source buffer to encode
diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c
index d1688f993..ac29f36ec 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/libvpx/vp9/encoder/vp9_mcomp.c
@@ -1731,239 +1731,99 @@ static int exhaustive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
 #define MAX_RANGE 256
 #define MIN_INTERVAL 1
 #if CONFIG_NON_GREEDY_MV
-
-#define LOG2_TABLE_SIZE 1024
-static const int log2_table[LOG2_TABLE_SIZE] = {
-  0,  // This is a dummy value
-  0,        1048576,  1661954,  2097152,  2434718,  2710530,  2943725,
-  3145728,  3323907,  3483294,  3627477,  3759106,  3880192,  3992301,
-  4096672,  4194304,  4286015,  4372483,  4454275,  4531870,  4605679,
-  4676053,  4743299,  4807682,  4869436,  4928768,  4985861,  5040877,
-  5093962,  5145248,  5194851,  5242880,  5289431,  5334591,  5378443,
-  5421059,  5462508,  5502851,  5542146,  5580446,  5617800,  5654255,
-  5689851,  5724629,  5758625,  5791875,  5824409,  5856258,  5887450,
-  5918012,  5947969,  5977344,  6006160,  6034437,  6062195,  6089453,
-  6116228,  6142538,  6168398,  6193824,  6218829,  6243427,  6267632,
-  6291456,  6314910,  6338007,  6360756,  6383167,  6405252,  6427019,
-  6448477,  6469635,  6490501,  6511084,  6531390,  6551427,  6571202,
-  6590722,  6609993,  6629022,  6647815,  6666376,  6684713,  6702831,
-  6720734,  6738427,  6755916,  6773205,  6790299,  6807201,  6823917,
-  6840451,  6856805,  6872985,  6888993,  6904834,  6920510,  6936026,
-  6951384,  6966588,  6981641,  6996545,  7011304,  7025920,  7040397,
-  7054736,  7068940,  7083013,  7096956,  7110771,  7124461,  7138029,
-  7151476,  7164804,  7178017,  7191114,  7204100,  7216974,  7229740,
-  7242400,  7254954,  7267405,  7279754,  7292003,  7304154,  7316208,
-  7328167,  7340032,  7351805,  7363486,  7375079,  7386583,  7398000,
-  7409332,  7420579,  7431743,  7442826,  7453828,  7464751,  7475595,
-  7486362,  7497053,  7507669,  7518211,  7528680,  7539077,  7549404,
-  7559660,  7569847,  7579966,  7590017,  7600003,  7609923,  7619778,
-  7629569,  7639298,  7648964,  7658569,  7668114,  7677598,  7687023,
-  7696391,  7705700,  7714952,  7724149,  7733289,  7742375,  7751407,
-  7760385,  7769310,  7778182,  7787003,  7795773,  7804492,  7813161,
-  7821781,  7830352,  7838875,  7847350,  7855777,  7864158,  7872493,
-  7880782,  7889027,  7897226,  7905381,  7913492,  7921561,  7929586,
-  7937569,  7945510,  7953410,  7961268,  7969086,  7976864,  7984602,
-  7992301,  7999960,  8007581,  8015164,  8022709,  8030217,  8037687,
-  8045121,  8052519,  8059880,  8067206,  8074496,  8081752,  8088973,
-  8096159,  8103312,  8110431,  8117516,  8124569,  8131589,  8138576,
-  8145532,  8152455,  8159347,  8166208,  8173037,  8179836,  8186605,
-  8193343,  8200052,  8206731,  8213380,  8220001,  8226593,  8233156,
-  8239690,  8246197,  8252676,  8259127,  8265550,  8271947,  8278316,
-  8284659,  8290976,  8297266,  8303530,  8309768,  8315981,  8322168,
-  8328330,  8334467,  8340579,  8346667,  8352730,  8358769,  8364784,
-  8370775,  8376743,  8382687,  8388608,  8394506,  8400381,  8406233,
-  8412062,  8417870,  8423655,  8429418,  8435159,  8440878,  8446576,
-  8452252,  8457908,  8463542,  8469155,  8474748,  8480319,  8485871,
-  8491402,  8496913,  8502404,  8507875,  8513327,  8518759,  8524171,
-  8529564,  8534938,  8540293,  8545629,  8550947,  8556245,  8561525,
-  8566787,  8572031,  8577256,  8582464,  8587653,  8592825,  8597980,
-  8603116,  8608236,  8613338,  8618423,  8623491,  8628542,  8633576,
-  8638593,  8643594,  8648579,  8653547,  8658499,  8663434,  8668354,
-  8673258,  8678145,  8683017,  8687874,  8692715,  8697540,  8702350,
-  8707145,  8711925,  8716690,  8721439,  8726174,  8730894,  8735599,
-  8740290,  8744967,  8749628,  8754276,  8758909,  8763528,  8768134,
-  8772725,  8777302,  8781865,  8786415,  8790951,  8795474,  8799983,
-  8804478,  8808961,  8813430,  8817886,  8822328,  8826758,  8831175,
-  8835579,  8839970,  8844349,  8848715,  8853068,  8857409,  8861737,
-  8866053,  8870357,  8874649,  8878928,  8883195,  8887451,  8891694,
-  8895926,  8900145,  8904353,  8908550,  8912734,  8916908,  8921069,
-  8925220,  8929358,  8933486,  8937603,  8941708,  8945802,  8949885,
-  8953957,  8958018,  8962068,  8966108,  8970137,  8974155,  8978162,
-  8982159,  8986145,  8990121,  8994086,  8998041,  9001986,  9005920,
-  9009844,  9013758,  9017662,  9021556,  9025440,  9029314,  9033178,
-  9037032,  9040877,  9044711,  9048536,  9052352,  9056157,  9059953,
-  9063740,  9067517,  9071285,  9075044,  9078793,  9082533,  9086263,
-  9089985,  9093697,  9097400,  9101095,  9104780,  9108456,  9112123,
-  9115782,  9119431,  9123072,  9126704,  9130328,  9133943,  9137549,
-  9141146,  9144735,  9148316,  9151888,  9155452,  9159007,  9162554,
-  9166092,  9169623,  9173145,  9176659,  9180165,  9183663,  9187152,
-  9190634,  9194108,  9197573,  9201031,  9204481,  9207923,  9211357,
-  9214784,  9218202,  9221613,  9225017,  9228412,  9231800,  9235181,
-  9238554,  9241919,  9245277,  9248628,  9251971,  9255307,  9258635,
-  9261956,  9265270,  9268577,  9271876,  9275169,  9278454,  9281732,
-  9285002,  9288266,  9291523,  9294773,  9298016,  9301252,  9304481,
-  9307703,  9310918,  9314126,  9317328,  9320523,  9323711,  9326892,
-  9330067,  9333235,  9336397,  9339552,  9342700,  9345842,  9348977,
-  9352106,  9355228,  9358344,  9361454,  9364557,  9367654,  9370744,
-  9373828,  9376906,  9379978,  9383043,  9386102,  9389155,  9392202,
-  9395243,  9398278,  9401306,  9404329,  9407345,  9410356,  9413360,
-  9416359,  9419351,  9422338,  9425319,  9428294,  9431263,  9434226,
-  9437184,  9440136,  9443082,  9446022,  9448957,  9451886,  9454809,
-  9457726,  9460638,  9463545,  9466446,  9469341,  9472231,  9475115,
-  9477994,  9480867,  9483735,  9486597,  9489454,  9492306,  9495152,
-  9497993,  9500828,  9503659,  9506484,  9509303,  9512118,  9514927,
-  9517731,  9520530,  9523324,  9526112,  9528895,  9531674,  9534447,
-  9537215,  9539978,  9542736,  9545489,  9548237,  9550980,  9553718,
-  9556451,  9559179,  9561903,  9564621,  9567335,  9570043,  9572747,
-  9575446,  9578140,  9580830,  9583514,  9586194,  9588869,  9591540,
-  9594205,  9596866,  9599523,  9602174,  9604821,  9607464,  9610101,
-  9612735,  9615363,  9617987,  9620607,  9623222,  9625832,  9628438,
-  9631040,  9633637,  9636229,  9638818,  9641401,  9643981,  9646556,
-  9649126,  9651692,  9654254,  9656812,  9659365,  9661914,  9664459,
-  9666999,  9669535,  9672067,  9674594,  9677118,  9679637,  9682152,
-  9684663,  9687169,  9689672,  9692170,  9694665,  9697155,  9699641,
-  9702123,  9704601,  9707075,  9709545,  9712010,  9714472,  9716930,
-  9719384,  9721834,  9724279,  9726721,  9729159,  9731593,  9734024,
-  9736450,  9738872,  9741291,  9743705,  9746116,  9748523,  9750926,
-  9753326,  9755721,  9758113,  9760501,  9762885,  9765266,  9767642,
-  9770015,  9772385,  9774750,  9777112,  9779470,  9781825,  9784175,
-  9786523,  9788866,  9791206,  9793543,  9795875,  9798204,  9800530,
-  9802852,  9805170,  9807485,  9809797,  9812104,  9814409,  9816710,
-  9819007,  9821301,  9823591,  9825878,  9828161,  9830441,  9832718,
-  9834991,  9837261,  9839527,  9841790,  9844050,  9846306,  9848559,
-  9850808,  9853054,  9855297,  9857537,  9859773,  9862006,  9864235,
-  9866462,  9868685,  9870904,  9873121,  9875334,  9877544,  9879751,
-  9881955,  9884155,  9886352,  9888546,  9890737,  9892925,  9895109,
-  9897291,  9899469,  9901644,  9903816,  9905985,  9908150,  9910313,
-  9912473,  9914629,  9916783,  9918933,  9921080,  9923225,  9925366,
-  9927504,  9929639,  9931771,  9933900,  9936027,  9938150,  9940270,
-  9942387,  9944502,  9946613,  9948721,  9950827,  9952929,  9955029,
-  9957126,  9959219,  9961310,  9963398,  9965484,  9967566,  9969645,
-  9971722,  9973796,  9975866,  9977934,  9980000,  9982062,  9984122,
-  9986179,  9988233,  9990284,  9992332,  9994378,  9996421,  9998461,
-  10000498, 10002533, 10004565, 10006594, 10008621, 10010644, 10012665,
-  10014684, 10016700, 10018713, 10020723, 10022731, 10024736, 10026738,
-  10028738, 10030735, 10032729, 10034721, 10036710, 10038697, 10040681,
-  10042662, 10044641, 10046617, 10048591, 10050562, 10052530, 10054496,
-  10056459, 10058420, 10060379, 10062334, 10064287, 10066238, 10068186,
-  10070132, 10072075, 10074016, 10075954, 10077890, 10079823, 10081754,
-  10083682, 10085608, 10087532, 10089453, 10091371, 10093287, 10095201,
-  10097112, 10099021, 10100928, 10102832, 10104733, 10106633, 10108529,
-  10110424, 10112316, 10114206, 10116093, 10117978, 10119861, 10121742,
-  10123620, 10125495, 10127369, 10129240, 10131109, 10132975, 10134839,
-  10136701, 10138561, 10140418, 10142273, 10144126, 10145976, 10147825,
-  10149671, 10151514, 10153356, 10155195, 10157032, 10158867, 10160699,
-  10162530, 10164358, 10166184, 10168007, 10169829, 10171648, 10173465,
-  10175280, 10177093, 10178904, 10180712, 10182519, 10184323, 10186125,
-  10187925, 10189722, 10191518, 10193311, 10195103, 10196892, 10198679,
-  10200464, 10202247, 10204028, 10205806, 10207583, 10209357, 10211130,
-  10212900, 10214668, 10216435, 10218199, 10219961, 10221721, 10223479,
-  10225235, 10226989, 10228741, 10230491, 10232239, 10233985, 10235728,
-  10237470, 10239210, 10240948, 10242684, 10244417, 10246149, 10247879,
-  10249607, 10251333, 10253057, 10254779, 10256499, 10258217, 10259933,
-  10261647, 10263360, 10265070, 10266778, 10268485, 10270189, 10271892,
-  10273593, 10275292, 10276988, 10278683, 10280376, 10282068, 10283757,
-  10285444, 10287130, 10288814, 10290495, 10292175, 10293853, 10295530,
-  10297204, 10298876, 10300547, 10302216, 10303883, 10305548, 10307211,
-  10308873, 10310532, 10312190, 10313846, 10315501, 10317153, 10318804,
-  10320452, 10322099, 10323745, 10325388, 10327030, 10328670, 10330308,
-  10331944, 10333578, 10335211, 10336842, 10338472, 10340099, 10341725,
-  10343349, 10344971, 10346592, 10348210, 10349828, 10351443, 10353057,
-  10354668, 10356279, 10357887, 10359494, 10361099, 10362702, 10364304,
-  10365904, 10367502, 10369099, 10370694, 10372287, 10373879, 10375468,
-  10377057, 10378643, 10380228, 10381811, 10383393, 10384973, 10386551,
-  10388128, 10389703, 10391276, 10392848, 10394418, 10395986, 10397553,
-  10399118, 10400682, 10402244, 10403804, 10405363, 10406920, 10408476,
-  10410030, 10411582, 10413133, 10414682, 10416230, 10417776, 10419320,
-  10420863, 10422404, 10423944, 10425482, 10427019, 10428554, 10430087,
-  10431619, 10433149, 10434678, 10436206, 10437731, 10439256, 10440778,
-  10442299, 10443819, 10445337, 10446854, 10448369, 10449882, 10451394,
-  10452905, 10454414, 10455921, 10457427, 10458932, 10460435, 10461936,
-  10463436, 10464935, 10466432, 10467927, 10469422, 10470914, 10472405,
-  10473895, 10475383, 10476870, 10478355, 10479839, 10481322, 10482802,
-  10484282,
-};
-
-#define LOG2_PRECISION 20
-static int64_t log2_approximation(int64_t v) {
-  assert(v > 0);
-  if (v < LOG2_TABLE_SIZE) {
-    return log2_table[v];
-  } else {
-    // use linear approximation when v >= 2^10
-    const int slope =
-        1477;  // slope = 1 / (log(2) * 1024) * (1 << LOG2_PRECISION)
-    assert(LOG2_TABLE_SIZE == 1 << 10);
-
-    return slope * (v - LOG2_TABLE_SIZE) + (10 << LOG2_PRECISION);
-  }
-}
-
-int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
-                                 int mv_num) {
-  int i;
-  int update = 0;
-  int64_t best_cost = 0;
-  vpx_clear_system_state();
-  for (i = 0; i < mv_num; ++i) {
-    if (nb_mvs[i].as_int != INVALID_MV) {
-      MV nb_mv = nb_mvs[i].as_mv;
-      const int64_t row_diff = abs(mv->row - nb_mv.row);
-      const int64_t col_diff = abs(mv->col - nb_mv.col);
-      const int64_t cost =
-          log2_approximation(1 + row_diff * row_diff + col_diff * col_diff);
-      if (update == 0) {
-        best_cost = cost;
-        update = 1;
-      } else {
-        best_cost = cost < best_cost ? cost : best_cost;
+static int64_t exhaustive_mesh_search_multi_step(
+    MV *best_mv, const MV *center_mv, int range, int step,
+    const struct buf_2d *src, const struct buf_2d *pre, int lambda,
+    const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits,
+    const vp9_variance_fn_ptr_t *fn_ptr) {
+  int64_t best_sad;
+  int r, c;
+  int start_col, end_col, start_row, end_row;
+  *best_mv = *center_mv;
+  best_sad =
+      ((int64_t)fn_ptr->sdf(src->buf, src->stride,
+                            get_buf_from_mv(pre, center_mv), pre->stride)
+       << LOG2_PRECISION) +
+      lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
+  start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
+  start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
+  end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
+  end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
+  for (r = start_row; r <= end_row; r += step) {
+    for (c = start_col; c <= end_col; c += step) {
+      const MV mv = { r, c };
+      int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
+                                         get_buf_from_mv(pre, &mv), pre->stride)
+                    << LOG2_PRECISION;
+      if (sad < best_sad) {
+        sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+        if (sad < best_sad) {
+          best_sad = sad;
+          *best_mv = mv;
+        }
       }
     }
   }
-  return best_cost;
+  return best_sad;
 }
 
-static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
-                                          int range, int step,
-                                          const vp9_variance_fn_ptr_t *fn_ptr,
-                                          const MV *center_mv, int lambda,
-                                          const int_mv *nb_full_mvs,
-                                          int full_mv_num) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct buf_2d *const what = &x->plane[0].src;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
-  MV fcenter_mv = { center_mv->row, center_mv->col };
+static int64_t exhaustive_mesh_search_single_step(
+    MV *best_mv, const MV *center_mv, int range, const struct buf_2d *src,
+    const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs,
+    int full_mv_num, const MvLimits *mv_limits,
+    const vp9_variance_fn_ptr_t *fn_ptr) {
   int64_t best_sad;
   int r, c, i;
   int start_col, end_col, start_row, end_row;
-  int col_step = (step > 1) ? step : 4;
 
-  assert(step >= 1);
-
-  clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
-           x->mv_limits.row_min, x->mv_limits.row_max);
-  *best_mv = fcenter_mv;
+  *best_mv = *center_mv;
   best_sad =
-      ((int64_t)fn_ptr->sdf(what->buf, what->stride,
-                            get_buf_from_mv(in_what, &fcenter_mv),
-                            in_what->stride)
+      ((int64_t)fn_ptr->sdf(src->buf, src->stride,
+                            get_buf_from_mv(pre, center_mv), pre->stride)
        << LOG2_PRECISION) +
-      lambda * vp9_nb_mvs_inconsistency(&fcenter_mv, nb_full_mvs, full_mv_num);
-  start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
-  start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
-  end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
-  end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
+      lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
+  start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
+  start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
+  end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
+  end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
+  for (r = start_row; r <= end_row; r += 1) {
+    c = start_col;
+    // sdx8f may not be available some block size
+    if (fn_ptr->sdx8f) {
+      while (c + 7 <= end_col) {
+        unsigned int sads[8];
+        const MV mv = { r, c };
+        const uint8_t *buf = get_buf_from_mv(pre, &mv);
+        fn_ptr->sdx8f(src->buf, src->stride, buf, pre->stride, sads);
+
+        for (i = 0; i < 8; ++i) {
+          int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
+          if (sad < best_sad) {
+            const MV mv = { r, c + i };
+            sad += lambda *
+                   vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+            if (sad < best_sad) {
+              best_sad = sad;
+              *best_mv = mv;
+            }
+          }
+        }
+        c += 8;
+      }
+    }
+    while (c + 3 <= end_col) {
+      unsigned int sads[4];
+      const uint8_t *addrs[4];
+      for (i = 0; i < 4; ++i) {
+        const MV mv = { r, c + i };
+        addrs[i] = get_buf_from_mv(pre, &mv);
+      }
+      fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads);
 
-  for (r = start_row; r <= end_row; r += step) {
-    for (c = start_col; c <= end_col; c += col_step) {
-      // Step > 1 means we are not checking every location in this pass.
-      if (step > 1) {
-        const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
-        int64_t sad =
-            (int64_t)fn_ptr->sdf(what->buf, what->stride,
-                                 get_buf_from_mv(in_what, &mv), in_what->stride)
-            << LOG2_PRECISION;
+      for (i = 0; i < 4; ++i) {
+        int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
         if (sad < best_sad) {
+          const MV mv = { r, c + i };
           sad +=
               lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
           if (sad < best_sad) {
@@ -1971,53 +1831,48 @@ static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
             *best_mv = mv;
           }
         }
-      } else {
-        // 4 sads in a single call if we are checking every location
-        if (c + 3 <= end_col) {
-          unsigned int sads[4];
-          const uint8_t *addrs[4];
-          for (i = 0; i < 4; ++i) {
-            const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
-            addrs[i] = get_buf_from_mv(in_what, &mv);
-          }
-          fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
-
-          for (i = 0; i < 4; ++i) {
-            int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
-            if (sad < best_sad) {
-              const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
-              sad += lambda *
-                     vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
-              if (sad < best_sad) {
-                best_sad = sad;
-                *best_mv = mv;
-              }
-            }
-          }
-        } else {
-          for (i = 0; i < end_col - c; ++i) {
-            const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
-            int64_t sad = (int64_t)fn_ptr->sdf(what->buf, what->stride,
-                                               get_buf_from_mv(in_what, &mv),
-                                               in_what->stride)
-                          << LOG2_PRECISION;
-            if (sad < best_sad) {
-              sad += lambda *
-                     vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
-              if (sad < best_sad) {
-                best_sad = sad;
-                *best_mv = mv;
-              }
-            }
-          }
+      }
+      c += 4;
+    }
+    while (c <= end_col) {
+      const MV mv = { r, c };
+      int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
+                                         get_buf_from_mv(pre, &mv), pre->stride)
+                    << LOG2_PRECISION;
+      if (sad < best_sad) {
+        sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+        if (sad < best_sad) {
+          best_sad = sad;
+          *best_mv = mv;
         }
       }
+      c += 1;
     }
   }
-
   return best_sad;
 }
 
+static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
+                                          int range, int step,
+                                          const vp9_variance_fn_ptr_t *fn_ptr,
+                                          const MV *center_mv, int lambda,
+                                          const int_mv *nb_full_mvs,
+                                          int full_mv_num) {
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const struct buf_2d *src = &x->plane[0].src;
+  const struct buf_2d *pre = &xd->plane[0].pre[0];
+  assert(step >= 1);
+  assert(is_mv_in(&x->mv_limits, center_mv));
+  if (step == 1) {
+    return exhaustive_mesh_search_single_step(
+        best_mv, center_mv, range, src, pre, lambda, nb_full_mvs, full_mv_num,
+        &x->mv_limits, fn_ptr);
+  }
+  return exhaustive_mesh_search_multi_step(best_mv, center_mv, range, step, src,
+                                           pre, lambda, nb_full_mvs,
+                                           full_mv_num, &x->mv_limits, fn_ptr);
+}
+
 static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
                                          MV *centre_mv_full,
                                          const vp9_variance_fn_ptr_t *fn_ptr,
@@ -2031,7 +1886,6 @@ static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
   int interval = sf->mesh_patterns[0].interval;
   int range = sf->mesh_patterns[0].range;
   int baseline_interval_divisor;
-  const MV dummy_mv = { 0, 0 };
 
   // Trap illegal values for interval and range for this function.
   if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
@@ -2067,19 +1921,18 @@ static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
     }
   }
 
-  bestsme = vp9_get_mvpred_var(x, &temp_mv, &dummy_mv, fn_ptr, 0);
   *dst_mv = temp_mv;
 
   return bestsme;
 }
 
-static double diamond_search_sad_new(const MACROBLOCK *x,
-                                     const search_site_config *cfg,
-                                     const MV *init_full_mv, MV *best_full_mv,
-                                     int search_param, int lambda, int *num00,
-                                     const vp9_variance_fn_ptr_t *fn_ptr,
-                                     const int_mv *nb_full_mvs,
-                                     int full_mv_num) {
+static int64_t diamond_search_sad_new(const MACROBLOCK *x,
+                                      const search_site_config *cfg,
+                                      const MV *init_full_mv, MV *best_full_mv,
+                                      int search_param, int lambda, int *num00,
+                                      const vp9_variance_fn_ptr_t *fn_ptr,
+                                      const int_mv *nb_full_mvs,
+                                      int full_mv_num) {
   int i, j, step;
 
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -2089,7 +1942,7 @@ static double diamond_search_sad_new(const MACROBLOCK *x,
   const int in_what_stride = xd->plane[0].pre[0].stride;
   const uint8_t *best_address;
 
-  double bestsad;
+  int64_t bestsad;
   int best_site = -1;
   int last_site = -1;
 
@@ -2116,11 +1969,11 @@ static double diamond_search_sad_new(const MACROBLOCK *x,
 
   // Check the starting position
   {
-    const double mv_dist =
-        fn_ptr->sdf(what, what_stride, in_what, in_what_stride);
-    const double mv_cost =
-        vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num) /
-        (double)(1 << LOG2_PRECISION);
+    const int64_t mv_dist =
+        (int64_t)fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+        << LOG2_PRECISION;
+    const int64_t mv_cost =
+        vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
     bestsad = mv_dist + lambda * mv_cost;
   }
 
@@ -2151,14 +2004,13 @@ static double diamond_search_sad_new(const MACROBLOCK *x,
                        sad_array);
 
         for (t = 0; t < 4; t++, i++) {
-          if (sad_array[t] < bestsad) {
+          const int64_t mv_dist = (int64_t)sad_array[t] << LOG2_PRECISION;
+          if (mv_dist < bestsad) {
             const MV this_mv = { best_full_mv->row + ss_mv[i].row,
                                  best_full_mv->col + ss_mv[i].col };
-            const double mv_dist = sad_array[t];
-            const double mv_cost =
-                vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num) /
-                (double)(1 << LOG2_PRECISION);
-            double thissad = mv_dist + lambda * mv_cost;
+            const int64_t mv_cost =
+                vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
+            const int64_t thissad = mv_dist + lambda * mv_cost;
             if (thissad < bestsad) {
               bestsad = thissad;
               best_site = i;
@@ -2174,13 +2026,14 @@ static double diamond_search_sad_new(const MACROBLOCK *x,
 
         if (is_mv_in(&x->mv_limits, &this_mv)) {
           const uint8_t *const check_here = ss_os[i] + best_address;
-          const double mv_dist =
-              fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
+          const int64_t mv_dist =
+              (int64_t)fn_ptr->sdf(what, what_stride, check_here,
+                                   in_what_stride)
+              << LOG2_PRECISION;
           if (mv_dist < bestsad) {
-            const double mv_cost =
-                vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num) /
-                (double)(1 << LOG2_PRECISION);
-            double thissad = mv_dist + lambda * mv_cost;
+            const int64_t mv_cost =
+                vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
+            const int64_t thissad = mv_dist + lambda * mv_cost;
             if (thissad < bestsad) {
               bestsad = thissad;
               best_site = i;
@@ -2202,32 +2055,30 @@ static double diamond_search_sad_new(const MACROBLOCK *x,
   return bestsad;
 }
 
-void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
-                             int mi_col, int rf_idx, BLOCK_SIZE bsize,
-                             int_mv *nb_full_mvs) {
-  const int mi_width = num_8x8_blocks_wide_lookup[bsize];
-  const int mi_height = num_8x8_blocks_high_lookup[bsize];
+int vp9_prepare_nb_full_mvs(const MotionField *motion_field, int mi_row,
+                            int mi_col, int_mv *nb_full_mvs) {
+  const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize];
+  const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize];
   const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
+  int nb_full_mv_num = 0;
   int i;
+  assert(mi_row % mi_height == 0);
+  assert(mi_col % mi_width == 0);
   for (i = 0; i < NB_MVS_NUM; ++i) {
-    int r = dirs[i][0] * mi_height;
-    int c = dirs[i][1] * mi_width;
-    if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
-        mi_col + c < tpl_frame->mi_cols) {
-      const TplDepStats *tpl_ptr =
-          &tpl_frame
-               ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];
-      int_mv *mv =
-          get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);
-      if (tpl_ptr->ready[rf_idx]) {
-        nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv);
-      } else {
-        nb_full_mvs[i].as_int = INVALID_MV;
+    int r = dirs[i][0];
+    int c = dirs[i][1];
+    int brow = mi_row / mi_height + r;
+    int bcol = mi_col / mi_width + c;
+    if (brow >= 0 && brow < motion_field->block_rows && bcol >= 0 &&
+        bcol < motion_field->block_cols) {
+      if (vp9_motion_field_is_mv_set(motion_field, brow, bcol)) {
+        int_mv mv = vp9_motion_field_get_mv(motion_field, brow, bcol);
+        nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv.as_mv);
+        ++nb_full_mv_num;
       }
-    } else {
-      nb_full_mvs[i].as_int = INVALID_MV;
     }
   }
+  return nb_full_mv_num;
 }
 #endif  // CONFIG_NON_GREEDY_MV
 
@@ -2585,26 +2436,32 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
   return best_sad;
 }
 
+static int get_exhaustive_threshold(int exhaustive_searches_thresh,
+                                    BLOCK_SIZE bsize) {
+  return exhaustive_searches_thresh >>
+         (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
+}
+
 #if CONFIG_NON_GREEDY_MV
 // Runs sequence of diamond searches in smaller steps for RD.
 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
               point as the best match, we will do a final 1-away diamond
               refining search  */
-double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
-                                  MV *mvp_full, int step_param, int lambda,
-                                  int do_refine,
-                                  const vp9_variance_fn_ptr_t *fn_ptr,
-                                  const int_mv *nb_full_mvs, int full_mv_num,
-                                  MV *best_mv) {
+int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
+                               BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+                               int lambda, int do_refine,
+                               const int_mv *nb_full_mvs, int full_mv_num,
+                               MV *best_mv) {
+  const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
+  const SPEED_FEATURES *const sf = &cpi->sf;
   int n, num00 = 0;
-  double thissme;
-  double bestsme;
+  int thissme;
+  int bestsme;
   const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param;
   const MV center_mv = { 0, 0 };
   vpx_clear_system_state();
-  bestsme =
-      diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param,
-                             lambda, &n, fn_ptr, nb_full_mvs, full_mv_num);
+  diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param, lambda,
+                         &n, fn_ptr, nb_full_mvs, full_mv_num);
 
   bestsme = vp9_get_mvpred_var(x, best_mv, &center_mv, fn_ptr, 0);
 
@@ -2618,9 +2475,9 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
       num00--;
     } else {
       MV temp_mv;
-      thissme = diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv,
-                                       step_param + n, lambda, &num00, fn_ptr,
-                                       nb_full_mvs, full_mv_num);
+      diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv,
+                             step_param + n, lambda, &num00, fn_ptr,
+                             nb_full_mvs, full_mv_num);
       thissme = vp9_get_mvpred_var(x, &temp_mv, &center_mv, fn_ptr, 0);
       // check to see if refining search is needed.
       if (num00 > further_steps - n) do_refine = 0;
@@ -2636,8 +2493,8 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
   if (do_refine) {
     const int search_range = 8;
     MV temp_mv = *best_mv;
-    thissme = vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range,
-                                          fn_ptr, nb_full_mvs, full_mv_num);
+    vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range, fn_ptr,
+                                nb_full_mvs, full_mv_num);
     thissme = vp9_get_mvpred_var(x, &temp_mv, &center_mv, fn_ptr, 0);
     if (thissme < bestsme) {
       bestsme = thissme;
@@ -2645,8 +2502,16 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
     }
   }
 
-  bestsme = (double)full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv,
-                                              lambda, nb_full_mvs, full_mv_num);
+  if (sf->exhaustive_searches_thresh < INT_MAX &&
+      !cpi->rc.is_src_frame_alt_ref) {
+    const int64_t exhaustive_thr =
+        get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
+    if (bestsme > exhaustive_thr) {
+      full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda,
+                                nb_full_mvs, full_mv_num);
+      bestsme = vp9_get_mvpred_var(x, best_mv, &center_mv, fn_ptr, 0);
+    }
+  }
   return bestsme;
 }
 #endif  // CONFIG_NON_GREEDY_MV
@@ -2774,24 +2639,25 @@ static int full_pixel_exhaustive(const VP9_COMP *const cpi,
 }
 
 #if CONFIG_NON_GREEDY_MV
-double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
-                                   int lambda, int search_range,
-                                   const vp9_variance_fn_ptr_t *fn_ptr,
-                                   const int_mv *nb_full_mvs, int full_mv_num) {
+int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
+                                    int lambda, int search_range,
+                                    const vp9_variance_fn_ptr_t *fn_ptr,
+                                    const int_mv *nb_full_mvs,
+                                    int full_mv_num) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
   const struct buf_2d *const what = &x->plane[0].src;
   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv);
-  double best_sad;
+  int64_t best_sad;
   int i, j;
   vpx_clear_system_state();
   {
-    const double mv_dist =
-        fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride);
-    const double mv_cost =
-        vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num) /
-        (double)(1 << LOG2_PRECISION);
+    const int64_t mv_dist = (int64_t)fn_ptr->sdf(what->buf, what->stride,
+                                                 best_address, in_what->stride)
+                            << LOG2_PRECISION;
+    const int64_t mv_cost =
+        vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
     best_sad = mv_dist + lambda * mv_cost;
   }
 
@@ -2813,11 +2679,10 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
       for (j = 0; j < 4; ++j) {
         const MV mv = { best_full_mv->row + neighbors[j].row,
                         best_full_mv->col + neighbors[j].col };
-        const double mv_dist = sads[j];
-        const double mv_cost =
-            vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num) /
-            (double)(1 << LOG2_PRECISION);
-        const double thissad = mv_dist + lambda * mv_cost;
+        const int64_t mv_dist = (int64_t)sads[j] << LOG2_PRECISION;
+        const int64_t mv_cost =
+            vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+        const int64_t thissad = mv_dist + lambda * mv_cost;
         if (thissad < best_sad) {
           best_sad = thissad;
           best_site = j;
@@ -2829,13 +2694,14 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
                         best_full_mv->col + neighbors[j].col };
 
         if (is_mv_in(&x->mv_limits, &mv)) {
-          const double mv_dist =
-              fn_ptr->sdf(what->buf, what->stride,
-                          get_buf_from_mv(in_what, &mv), in_what->stride);
-          const double mv_cost =
-              vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num) /
-              (double)(1 << LOG2_PRECISION);
-          const double thissad = mv_dist + lambda * mv_cost;
+          const int64_t mv_dist =
+              (int64_t)fn_ptr->sdf(what->buf, what->stride,
+                                   get_buf_from_mv(in_what, &mv),
+                                   in_what->stride)
+              << LOG2_PRECISION;
+          const int64_t mv_cost =
+              vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+          const int64_t thissad = mv_dist + lambda * mv_cost;
           if (thissad < best_sad) {
             best_sad = thissad;
             best_site = j;
@@ -3034,9 +2900,10 @@ int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x,
     if (sf->exhaustive_searches_thresh < INT_MAX &&
         !cpi->rc.is_src_frame_alt_ref) {
       const int64_t exhaustive_thr =
-          sf->exhaustive_searches_thresh >>
-          (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
-      if (var > exhaustive_thr) run_exhaustive_search = 1;
+          get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
+      if (var > exhaustive_thr) {
+        run_exhaustive_search = 1;
+      }
     }
   } else if (method == MESH) {
     run_exhaustive_search = 1;
diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h
index cafa2d150..0c4d8f23c 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/libvpx/vp9/encoder/vp9_mcomp.h
@@ -12,6 +12,9 @@
 #define VPX_VP9_ENCODER_VP9_MCOMP_H_
 
 #include "vp9/encoder/vp9_block.h"
+#if CONFIG_NON_GREEDY_MV
+#include "vp9/encoder/vp9_non_greedy_mv.h"
+#endif  // CONFIG_NON_GREEDY_MV
 #include "vpx_dsp/variance.h"
 
 #ifdef __cplusplus
@@ -126,22 +129,18 @@ void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits,
                                     const MV *ref_mv);
 
 #if CONFIG_NON_GREEDY_MV
-#define NB_MVS_NUM 4
 struct TplDepStats;
-double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
-                                   int lambda, int search_range,
-                                   const vp9_variance_fn_ptr_t *fn_ptr,
-                                   const int_mv *nb_full_mvs, int full_mv_num);
-
-double vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x,
-                                  MV *mvp_full, int step_param, int lambda,
-                                  int do_refine,
-                                  const vp9_variance_fn_ptr_t *fn_ptr,
-                                  const int_mv *nb_full_mvs, int full_mv_num,
-                                  MV *best_mv);
-
-int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
-                                 int mv_num);
+int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
+                                    int lambda, int search_range,
+                                    const vp9_variance_fn_ptr_t *fn_ptr,
+                                    const int_mv *nb_full_mvs, int full_mv_num);
+
+int vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x,
+                               BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+                               int lambda, int do_refine,
+                               const int_mv *nb_full_mvs, int full_mv_num,
+                               MV *best_mv);
+
 static INLINE MV get_full_mv(const MV *mv) {
   MV out_mv;
   out_mv.row = mv->row >> 3;
@@ -149,9 +148,8 @@ static INLINE MV get_full_mv(const MV *mv) {
   return out_mv;
 }
 struct TplDepFrame;
-void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,
-                             int mi_col, int rf_idx, BLOCK_SIZE bsize,
-                             int_mv *nb_full_mvs);
+int vp9_prepare_nb_full_mvs(const struct MotionField *motion_field, int mi_row,
+                            int mi_col, int_mv *nb_full_mvs);
 
 static INLINE BLOCK_SIZE get_square_block_size(BLOCK_SIZE bsize) {
   BLOCK_SIZE square_bsize;
diff --git a/libvpx/vp9/encoder/vp9_non_greedy_mv.c b/libvpx/vp9/encoder/vp9_non_greedy_mv.c
new file mode 100644
index 000000000..4679d6c49
--- /dev/null
+++ b/libvpx/vp9/encoder/vp9_non_greedy_mv.c
@@ -0,0 +1,533 @@
+/*
+ *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_mv.h"
+#include "vp9/encoder/vp9_non_greedy_mv.h"
+// TODO(angiebird): move non_greedy_mv related functions to this file
+
+#define LOG2_TABLE_SIZE 1024
+static const int log2_table[LOG2_TABLE_SIZE] = {
+  0,  // This is a dummy value
+  0,        1048576,  1661954,  2097152,  2434718,  2710530,  2943725,
+  3145728,  3323907,  3483294,  3627477,  3759106,  3880192,  3992301,
+  4096672,  4194304,  4286015,  4372483,  4454275,  4531870,  4605679,
+  4676053,  4743299,  4807682,  4869436,  4928768,  4985861,  5040877,
+  5093962,  5145248,  5194851,  5242880,  5289431,  5334591,  5378443,
+  5421059,  5462508,  5502851,  5542146,  5580446,  5617800,  5654255,
+  5689851,  5724629,  5758625,  5791875,  5824409,  5856258,  5887450,
+  5918012,  5947969,  5977344,  6006160,  6034437,  6062195,  6089453,
+  6116228,  6142538,  6168398,  6193824,  6218829,  6243427,  6267632,
+  6291456,  6314910,  6338007,  6360756,  6383167,  6405252,  6427019,
+  6448477,  6469635,  6490501,  6511084,  6531390,  6551427,  6571202,
+  6590722,  6609993,  6629022,  6647815,  6666376,  6684713,  6702831,
+  6720734,  6738427,  6755916,  6773205,  6790299,  6807201,  6823917,
+  6840451,  6856805,  6872985,  6888993,  6904834,  6920510,  6936026,
+  6951384,  6966588,  6981641,  6996545,  7011304,  7025920,  7040397,
+  7054736,  7068940,  7083013,  7096956,  7110771,  7124461,  7138029,
+  7151476,  7164804,  7178017,  7191114,  7204100,  7216974,  7229740,
+  7242400,  7254954,  7267405,  7279754,  7292003,  7304154,  7316208,
+  7328167,  7340032,  7351805,  7363486,  7375079,  7386583,  7398000,
+  7409332,  7420579,  7431743,  7442826,  7453828,  7464751,  7475595,
+  7486362,  7497053,  7507669,  7518211,  7528680,  7539077,  7549404,
+  7559660,  7569847,  7579966,  7590017,  7600003,  7609923,  7619778,
+  7629569,  7639298,  7648964,  7658569,  7668114,  7677598,  7687023,
+  7696391,  7705700,  7714952,  7724149,  7733289,  7742375,  7751407,
+  7760385,  7769310,  7778182,  7787003,  7795773,  7804492,  7813161,
+  7821781,  7830352,  7838875,  7847350,  7855777,  7864158,  7872493,
+  7880782,  7889027,  7897226,  7905381,  7913492,  7921561,  7929586,
+  7937569,  7945510,  7953410,  7961268,  7969086,  7976864,  7984602,
+  7992301,  7999960,  8007581,  8015164,  8022709,  8030217,  8037687,
+  8045121,  8052519,  8059880,  8067206,  8074496,  8081752,  8088973,
+  8096159,  8103312,  8110431,  8117516,  8124569,  8131589,  8138576,
+  8145532,  8152455,  8159347,  8166208,  8173037,  8179836,  8186605,
+  8193343,  8200052,  8206731,  8213380,  8220001,  8226593,  8233156,
+  8239690,  8246197,  8252676,  8259127,  8265550,  8271947,  8278316,
+  8284659,  8290976,  8297266,  8303530,  8309768,  8315981,  8322168,
+  8328330,  8334467,  8340579,  8346667,  8352730,  8358769,  8364784,
+  8370775,  8376743,  8382687,  8388608,  8394506,  8400381,  8406233,
+  8412062,  8417870,  8423655,  8429418,  8435159,  8440878,  8446576,
+  8452252,  8457908,  8463542,  8469155,  8474748,  8480319,  8485871,
+  8491402,  8496913,  8502404,  8507875,  8513327,  8518759,  8524171,
+  8529564,  8534938,  8540293,  8545629,  8550947,  8556245,  8561525,
+  8566787,  8572031,  8577256,  8582464,  8587653,  8592825,  8597980,
+  8603116,  8608236,  8613338,  8618423,  8623491,  8628542,  8633576,
+  8638593,  8643594,  8648579,  8653547,  8658499,  8663434,  8668354,
+  8673258,  8678145,  8683017,  8687874,  8692715,  8697540,  8702350,
+  8707145,  8711925,  8716690,  8721439,  8726174,  8730894,  8735599,
+  8740290,  8744967,  8749628,  8754276,  8758909,  8763528,  8768134,
+  8772725,  8777302,  8781865,  8786415,  8790951,  8795474,  8799983,
+  8804478,  8808961,  8813430,  8817886,  8822328,  8826758,  8831175,
+  8835579,  8839970,  8844349,  8848715,  8853068,  8857409,  8861737,
+  8866053,  8870357,  8874649,  8878928,  8883195,  8887451,  8891694,
+  8895926,  8900145,  8904353,  8908550,  8912734,  8916908,  8921069,
+  8925220,  8929358,  8933486,  8937603,  8941708,  8945802,  8949885,
+  8953957,  8958018,  8962068,  8966108,  8970137,  8974155,  8978162,
+  8982159,  8986145,  8990121,  8994086,  8998041,  9001986,  9005920,
+  9009844,  9013758,  9017662,  9021556,  9025440,  9029314,  9033178,
+  9037032,  9040877,  9044711,  9048536,  9052352,  9056157,  9059953,
+  9063740,  9067517,  9071285,  9075044,  9078793,  9082533,  9086263,
+  9089985,  9093697,  9097400,  9101095,  9104780,  9108456,  9112123,
+  9115782,  9119431,  9123072,  9126704,  9130328,  9133943,  9137549,
+  9141146,  9144735,  9148316,  9151888,  9155452,  9159007,  9162554,
+  9166092,  9169623,  9173145,  9176659,  9180165,  9183663,  9187152,
+  9190634,  9194108,  9197573,  9201031,  9204481,  9207923,  9211357,
+  9214784,  9218202,  9221613,  9225017,  9228412,  9231800,  9235181,
+  9238554,  9241919,  9245277,  9248628,  9251971,  9255307,  9258635,
+  9261956,  9265270,  9268577,  9271876,  9275169,  9278454,  9281732,
+  9285002,  9288266,  9291523,  9294773,  9298016,  9301252,  9304481,
+  9307703,  9310918,  9314126,  9317328,  9320523,  9323711,  9326892,
+  9330067,  9333235,  9336397,  9339552,  9342700,  9345842,  9348977,
+  9352106,  9355228,  9358344,  9361454,  9364557,  9367654,  9370744,
+  9373828,  9376906,  9379978,  9383043,  9386102,  9389155,  9392202,
+  9395243,  9398278,  9401306,  9404329,  9407345,  9410356,  9413360,
+  9416359,  9419351,  9422338,  9425319,  9428294,  9431263,  9434226,
+  9437184,  9440136,  9443082,  9446022,  9448957,  9451886,  9454809,
+  9457726,  9460638,  9463545,  9466446,  9469341,  9472231,  9475115,
+  9477994,  9480867,  9483735,  9486597,  9489454,  9492306,  9495152,
+  9497993,  9500828,  9503659,  9506484,  9509303,  9512118,  9514927,
+  9517731,  9520530,  9523324,  9526112,  9528895,  9531674,  9534447,
+  9537215,  9539978,  9542736,  9545489,  9548237,  9550980,  9553718,
+  9556451,  9559179,  9561903,  9564621,  9567335,  9570043,  9572747,
+  9575446,  9578140,  9580830,  9583514,  9586194,  9588869,  9591540,
+  9594205,  9596866,  9599523,  9602174,  9604821,  9607464,  9610101,
+  9612735,  9615363,  9617987,  9620607,  9623222,  9625832,  9628438,
+  9631040,  9633637,  9636229,  9638818,  9641401,  9643981,  9646556,
+  9649126,  9651692,  9654254,  9656812,  9659365,  9661914,  9664459,
+  9666999,  9669535,  9672067,  9674594,  9677118,  9679637,  9682152,
+  9684663,  9687169,  9689672,  9692170,  9694665,  9697155,  9699641,
+  9702123,  9704601,  9707075,  9709545,  9712010,  9714472,  9716930,
+  9719384,  9721834,  9724279,  9726721,  9729159,  9731593,  9734024,
+  9736450,  9738872,  9741291,  9743705,  9746116,  9748523,  9750926,
+  9753326,  9755721,  9758113,  9760501,  9762885,  9765266,  9767642,
+  9770015,  9772385,  9774750,  9777112,  9779470,  9781825,  9784175,
+  9786523,  9788866,  9791206,  9793543,  9795875,  9798204,  9800530,
+  9802852,  9805170,  9807485,  9809797,  9812104,  9814409,  9816710,
+  9819007,  9821301,  9823591,  9825878,  9828161,  9830441,  9832718,
+  9834991,  9837261,  9839527,  9841790,  9844050,  9846306,  9848559,
+  9850808,  9853054,  9855297,  9857537,  9859773,  9862006,  9864235,
+  9866462,  9868685,  9870904,  9873121,  9875334,  9877544,  9879751,
+  9881955,  9884155,  9886352,  9888546,  9890737,  9892925,  9895109,
+  9897291,  9899469,  9901644,  9903816,  9905985,  9908150,  9910313,
+  9912473,  9914629,  9916783,  9918933,  9921080,  9923225,  9925366,
+  9927504,  9929639,  9931771,  9933900,  9936027,  9938150,  9940270,
+  9942387,  9944502,  9946613,  9948721,  9950827,  9952929,  9955029,
+  9957126,  9959219,  9961310,  9963398,  9965484,  9967566,  9969645,
+  9971722,  9973796,  9975866,  9977934,  9980000,  9982062,  9984122,
+  9986179,  9988233,  9990284,  9992332,  9994378,  9996421,  9998461,
+  10000498, 10002533, 10004565, 10006594, 10008621, 10010644, 10012665,
+  10014684, 10016700, 10018713, 10020723, 10022731, 10024736, 10026738,
+  10028738, 10030735, 10032729, 10034721, 10036710, 10038697, 10040681,
+  10042662, 10044641, 10046617, 10048591, 10050562, 10052530, 10054496,
+  10056459, 10058420, 10060379, 10062334, 10064287, 10066238, 10068186,
+  10070132, 10072075, 10074016, 10075954, 10077890, 10079823, 10081754,
+  10083682, 10085608, 10087532, 10089453, 10091371, 10093287, 10095201,
+  10097112, 10099021, 10100928, 10102832, 10104733, 10106633, 10108529,
+  10110424, 10112316, 10114206, 10116093, 10117978, 10119861, 10121742,
+  10123620, 10125495, 10127369, 10129240, 10131109, 10132975, 10134839,
+  10136701, 10138561, 10140418, 10142273, 10144126, 10145976, 10147825,
+  10149671, 10151514, 10153356, 10155195, 10157032, 10158867, 10160699,
+  10162530, 10164358, 10166184, 10168007, 10169829, 10171648, 10173465,
+  10175280, 10177093, 10178904, 10180712, 10182519, 10184323, 10186125,
+  10187925, 10189722, 10191518, 10193311, 10195103, 10196892, 10198679,
+  10200464, 10202247, 10204028, 10205806, 10207583, 10209357, 10211130,
+  10212900, 10214668, 10216435, 10218199, 10219961, 10221721, 10223479,
+  10225235, 10226989, 10228741, 10230491, 10232239, 10233985, 10235728,
+  10237470, 10239210, 10240948, 10242684, 10244417, 10246149, 10247879,
+  10249607, 10251333, 10253057, 10254779, 10256499, 10258217, 10259933,
+  10261647, 10263360, 10265070, 10266778, 10268485, 10270189, 10271892,
+  10273593, 10275292, 10276988, 10278683, 10280376, 10282068, 10283757,
+  10285444, 10287130, 10288814, 10290495, 10292175, 10293853, 10295530,
+  10297204, 10298876, 10300547, 10302216, 10303883, 10305548, 10307211,
+  10308873, 10310532, 10312190, 10313846, 10315501, 10317153, 10318804,
+  10320452, 10322099, 10323745, 10325388, 10327030, 10328670, 10330308,
+  10331944, 10333578, 10335211, 10336842, 10338472, 10340099, 10341725,
+  10343349, 10344971, 10346592, 10348210, 10349828, 10351443, 10353057,
+  10354668, 10356279, 10357887, 10359494, 10361099, 10362702, 10364304,
+  10365904, 10367502, 10369099, 10370694, 10372287, 10373879, 10375468,
+  10377057, 10378643, 10380228, 10381811, 10383393, 10384973, 10386551,
+  10388128, 10389703, 10391276, 10392848, 10394418, 10395986, 10397553,
+  10399118, 10400682, 10402244, 10403804, 10405363, 10406920, 10408476,
+  10410030, 10411582, 10413133, 10414682, 10416230, 10417776, 10419320,
+  10420863, 10422404, 10423944, 10425482, 10427019, 10428554, 10430087,
+  10431619, 10433149, 10434678, 10436206, 10437731, 10439256, 10440778,
+  10442299, 10443819, 10445337, 10446854, 10448369, 10449882, 10451394,
+  10452905, 10454414, 10455921, 10457427, 10458932, 10460435, 10461936,
+  10463436, 10464935, 10466432, 10467927, 10469422, 10470914, 10472405,
+  10473895, 10475383, 10476870, 10478355, 10479839, 10481322, 10482802,
+  10484282,
+};
+
+static int mi_size_to_block_size(int mi_bsize, int mi_num) {
+  return (mi_num % mi_bsize) ? mi_num / mi_bsize + 1 : mi_num / mi_bsize;
+}
+
+Status vp9_alloc_motion_field_info(MotionFieldInfo *motion_field_info,
+                                   int frame_num, int mi_rows, int mi_cols) {
+  int frame_idx, rf_idx, square_block_idx;
+  if (motion_field_info->allocated) {
+    // TODO(angiebird): Avoid re-allocate buffer if possible
+    vp9_free_motion_field_info(motion_field_info);
+  }
+  motion_field_info->frame_num = frame_num;
+  motion_field_info->motion_field_array =
+      vpx_calloc(frame_num, sizeof(*motion_field_info->motion_field_array));
+  for (frame_idx = 0; frame_idx < frame_num; ++frame_idx) {
+    for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
+      for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
+           ++square_block_idx) {
+        BLOCK_SIZE bsize = square_block_idx_to_bsize(square_block_idx);
+        const int mi_height = num_8x8_blocks_high_lookup[bsize];
+        const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+        const int block_rows = mi_size_to_block_size(mi_height, mi_rows);
+        const int block_cols = mi_size_to_block_size(mi_width, mi_cols);
+        MotionField *motion_field =
+            &motion_field_info
+                 ->motion_field_array[frame_idx][rf_idx][square_block_idx];
+        Status status =
+            vp9_alloc_motion_field(motion_field, bsize, block_rows, block_cols);
+        if (status == STATUS_FAILED) {
+          return STATUS_FAILED;
+        }
+      }
+    }
+  }
+  motion_field_info->allocated = 1;
+  return STATUS_OK;
+}
+
+Status vp9_alloc_motion_field(MotionField *motion_field, BLOCK_SIZE bsize,
+                              int block_rows, int block_cols) {
+  Status status = STATUS_OK;
+  motion_field->ready = 0;
+  motion_field->bsize = bsize;
+  motion_field->block_rows = block_rows;
+  motion_field->block_cols = block_cols;
+  motion_field->block_num = block_rows * block_cols;
+  motion_field->mf =
+      vpx_calloc(motion_field->block_num, sizeof(*motion_field->mf));
+  if (motion_field->mf == NULL) {
+    status = STATUS_FAILED;
+  }
+  motion_field->set_mv =
+      vpx_calloc(motion_field->block_num, sizeof(*motion_field->set_mv));
+  if (motion_field->set_mv == NULL) {
+    vpx_free(motion_field->mf);
+    motion_field->mf = NULL;
+    status = STATUS_FAILED;
+  }
+  motion_field->local_structure = vpx_calloc(
+      motion_field->block_num, sizeof(*motion_field->local_structure));
+  if (motion_field->local_structure == NULL) {
+    vpx_free(motion_field->mf);
+    motion_field->mf = NULL;
+    vpx_free(motion_field->set_mv);
+    motion_field->set_mv = NULL;
+    status = STATUS_FAILED;
+  }
+  return status;
+}
+
+void vp9_free_motion_field(MotionField *motion_field) {
+  vpx_free(motion_field->mf);
+  vpx_free(motion_field->set_mv);
+  vpx_free(motion_field->local_structure);
+  vp9_zero(*motion_field);
+}
+
+void vp9_free_motion_field_info(MotionFieldInfo *motion_field_info) {
+  if (motion_field_info->allocated) {
+    int frame_idx, rf_idx, square_block_idx;
+    for (frame_idx = 0; frame_idx < motion_field_info->frame_num; ++frame_idx) {
+      for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
+        for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
+             ++square_block_idx) {
+          MotionField *motion_field =
+              &motion_field_info
+                   ->motion_field_array[frame_idx][rf_idx][square_block_idx];
+          vp9_free_motion_field(motion_field);
+        }
+      }
+    }
+    vpx_free(motion_field_info->motion_field_array);
+    motion_field_info->motion_field_array = NULL;
+    motion_field_info->frame_num = 0;
+    motion_field_info->allocated = 0;
+  }
+}
+
+MotionField *vp9_motion_field_info_get_motion_field(
+    MotionFieldInfo *motion_field_info, int frame_idx, int rf_idx,
+    BLOCK_SIZE bsize) {
+  int square_block_idx = get_square_block_idx(bsize);
+  assert(frame_idx < motion_field_info->frame_num);
+  assert(motion_field_info->allocated == 1);
+  return &motion_field_info
+              ->motion_field_array[frame_idx][rf_idx][square_block_idx];
+}
+
+int vp9_motion_field_is_mv_set(const MotionField *motion_field, int brow,
+                               int bcol) {
+  assert(brow >= 0 && brow < motion_field->block_rows);
+  assert(bcol >= 0 && bcol < motion_field->block_cols);
+  return motion_field->set_mv[brow * motion_field->block_cols + bcol];
+}
+
+int_mv vp9_motion_field_get_mv(const MotionField *motion_field, int brow,
+                               int bcol) {
+  assert(brow >= 0 && brow < motion_field->block_rows);
+  assert(bcol >= 0 && bcol < motion_field->block_cols);
+  return motion_field->mf[brow * motion_field->block_cols + bcol];
+}
+
+int_mv vp9_motion_field_mi_get_mv(const MotionField *motion_field, int mi_row,
+                                  int mi_col) {
+  const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize];
+  const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize];
+  const int brow = mi_row / mi_height;
+  const int bcol = mi_col / mi_width;
+  assert(mi_row % mi_height == 0);
+  assert(mi_col % mi_width == 0);
+  return vp9_motion_field_get_mv(motion_field, brow, bcol);
+}
+
+void vp9_motion_field_mi_set_mv(MotionField *motion_field, int mi_row,
+                                int mi_col, int_mv mv) {
+  const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize];
+  const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize];
+  const int brow = mi_row / mi_height;
+  const int bcol = mi_col / mi_width;
+  assert(mi_row % mi_height == 0);
+  assert(mi_col % mi_width == 0);
+  assert(brow >= 0 && brow < motion_field->block_rows);
+  assert(bcol >= 0 && bcol < motion_field->block_cols);
+  motion_field->mf[brow * motion_field->block_cols + bcol] = mv;
+  motion_field->set_mv[brow * motion_field->block_cols + bcol] = 1;
+}
+
+void vp9_motion_field_reset_mvs(MotionField *motion_field) {
+  memset(motion_field->set_mv, 0,
+         motion_field->block_num * sizeof(*motion_field->set_mv));
+}
+
+static int64_t log2_approximation(int64_t v) {
+  assert(v > 0);
+  if (v < LOG2_TABLE_SIZE) {
+    return log2_table[v];
+  } else {
+    // use linear approximation when v >= 2^10
+    const int slope =
+        1477;  // slope = 1 / (log(2) * 1024) * (1 << LOG2_PRECISION)
+    assert(LOG2_TABLE_SIZE == 1 << 10);
+
+    return slope * (v - LOG2_TABLE_SIZE) + (10 << LOG2_PRECISION);
+  }
+}
+
+int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs,
+                                 int mv_num) {
+  // The behavior of this function is to compute log2 of mv difference,
+  // i.e. min log2(1 + row_diff * row_diff + col_diff * col_diff)
+  // against available neighbor mvs.
+  // Since the log2 is monotonically increasing, we can compute
+  // min row_diff * row_diff + col_diff * col_diff first
+  // then apply log2 in the end.
+  int i;
+  int64_t min_abs_diff = INT64_MAX;
+  int cnt = 0;
+  assert(mv_num <= NB_MVS_NUM);
+  for (i = 0; i < mv_num; ++i) {
+    MV nb_mv = nb_full_mvs[i].as_mv;
+    const int64_t row_diff = abs(mv->row - nb_mv.row);
+    const int64_t col_diff = abs(mv->col - nb_mv.col);
+    const int64_t abs_diff = row_diff * row_diff + col_diff * col_diff;
+    assert(nb_full_mvs[i].as_int != INVALID_MV);
+    min_abs_diff = VPXMIN(abs_diff, min_abs_diff);
+    ++cnt;
+  }
+  if (cnt) {
+    return log2_approximation(1 + min_abs_diff);
+  }
+  return 0;
+}
+
+static FloatMV get_smooth_motion_vector(const FloatMV scaled_search_mv,
+                                        const FloatMV *tmp_mf,
+                                        const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
+                                        int rows, int cols, int row, int col,
+                                        float alpha) {
+  const FloatMV tmp_mv = tmp_mf[row * cols + col];
+  int idx_row, idx_col;
+  FloatMV avg_nb_mv = { 0.0f, 0.0f };
+  FloatMV mv = { 0.0f, 0.0f };
+  float filter[3][3] = { { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f },
+                         { 1.0f / 6.0f, 0.0f, 1.0f / 6.0f },
+                         { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f } };
+  for (idx_row = 0; idx_row < 3; ++idx_row) {
+    int nb_row = row + idx_row - 1;
+    for (idx_col = 0; idx_col < 3; ++idx_col) {
+      int nb_col = col + idx_col - 1;
+      if (nb_row < 0 || nb_col < 0 || nb_row >= rows || nb_col >= cols) {
+        avg_nb_mv.row += (tmp_mv.row) * filter[idx_row][idx_col];
+        avg_nb_mv.col += (tmp_mv.col) * filter[idx_row][idx_col];
+      } else {
+        const FloatMV nb_mv = tmp_mf[nb_row * cols + nb_col];
+        avg_nb_mv.row += (nb_mv.row) * filter[idx_row][idx_col];
+        avg_nb_mv.col += (nb_mv.col) * filter[idx_row][idx_col];
+      }
+    }
+  }
+  {
+    // M is the local variance of reference frame
+    float M00 = M[row * cols + col][0];
+    float M01 = M[row * cols + col][1];
+    float M10 = M[row * cols + col][2];
+    float M11 = M[row * cols + col][3];
+
+    float det = (M00 + alpha) * (M11 + alpha) - M01 * M10;
+
+    float inv_M00 = (M11 + alpha) / det;
+    float inv_M01 = -M01 / det;
+    float inv_M10 = -M10 / det;
+    float inv_M11 = (M00 + alpha) / det;
+
+    float inv_MM00 = inv_M00 * M00 + inv_M01 * M10;
+    float inv_MM01 = inv_M00 * M01 + inv_M01 * M11;
+    float inv_MM10 = inv_M10 * M00 + inv_M11 * M10;
+    float inv_MM11 = inv_M10 * M01 + inv_M11 * M11;
+
+    mv.row = inv_M00 * avg_nb_mv.row * alpha + inv_M01 * avg_nb_mv.col * alpha +
+             inv_MM00 * scaled_search_mv.row + inv_MM01 * scaled_search_mv.col;
+    mv.col = inv_M10 * avg_nb_mv.row * alpha + inv_M11 * avg_nb_mv.col * alpha +
+             inv_MM10 * scaled_search_mv.row + inv_MM11 * scaled_search_mv.col;
+  }
+  return mv;
+}
+
+void vp9_get_smooth_motion_field(const MV *search_mf,
+                                 const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
+                                 int rows, int cols, BLOCK_SIZE bsize,
+                                 float alpha, int num_iters, MV *smooth_mf) {
+  // M is the local variation of reference frame
+  // build two buffers
+  FloatMV *input = (FloatMV *)malloc(rows * cols * sizeof(FloatMV));
+  FloatMV *output = (FloatMV *)malloc(rows * cols * sizeof(FloatMV));
+  int idx;
+  int row, col;
+  int bw = 4 << b_width_log2_lookup[bsize];
+  int bh = 4 << b_height_log2_lookup[bsize];
+  // copy search results to input buffer
+  for (idx = 0; idx < rows * cols; ++idx) {
+    input[idx].row = (float)search_mf[idx].row / bh;
+    input[idx].col = (float)search_mf[idx].col / bw;
+  }
+  for (idx = 0; idx < num_iters; ++idx) {
+    FloatMV *tmp;
+    for (row = 0; row < rows; ++row) {
+      for (col = 0; col < cols; ++col) {
+        // note: the scaled_search_mf and smooth_mf are all scaled by macroblock
+        // size
+        const MV search_mv = search_mf[row * cols + col];
+        FloatMV scaled_search_mv = { (float)search_mv.row / bh,
+                                     (float)search_mv.col / bw };
+        output[row * cols + col] = get_smooth_motion_vector(
+            scaled_search_mv, input, M, rows, cols, row, col, alpha);
+      }
+    }
+    // swap buffers
+    tmp = input;
+    input = output;
+    output = tmp;
+  }
+  // copy smoothed results to output
+  for (idx = 0; idx < rows * cols; ++idx) {
+    smooth_mf[idx].row = (int)(input[idx].row * bh);
+    smooth_mf[idx].col = (int)(input[idx].col * bw);
+  }
+  free(input);
+  free(output);
+}
+
+void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame,
+                             const YV12_BUFFER_CONFIG *ref_frame,
+                             const MV *search_mf,
+                             const vp9_variance_fn_ptr_t *fn_ptr, int rows,
+                             int cols, BLOCK_SIZE bsize,
+                             int (*M)[MF_LOCAL_STRUCTURE_SIZE]) {
+  const int bw = 4 << b_width_log2_lookup[bsize];
+  const int bh = 4 << b_height_log2_lookup[bsize];
+  const int cur_stride = cur_frame->y_stride;
+  const int ref_stride = ref_frame->y_stride;
+  const int width = ref_frame->y_width;
+  const int height = ref_frame->y_height;
+  int row, col;
+  for (row = 0; row < rows; ++row) {
+    for (col = 0; col < cols; ++col) {
+      int cur_offset = row * bh * cur_stride + col * bw;
+      uint8_t *center = cur_frame->y_buffer + cur_offset;
+      int ref_h = row * bh + search_mf[row * cols + col].row;
+      int ref_w = col * bw + search_mf[row * cols + col].col;
+      int ref_offset;
+      uint8_t *target;
+      uint8_t *nb;
+      int search_dist;
+      int nb_dist;
+      int I_row = 0, I_col = 0;
+      // TODO(Dan): handle the case that when reference frame block beyond the
+      // boundary
+      ref_h = ref_h < 0 ? 0 : (ref_h >= height - bh ? height - bh - 1 : ref_h);
+      ref_w = ref_w < 0 ? 0 : (ref_w >= width - bw ? width - bw - 1 : ref_w);
+      // compute search results distortion
+      // TODO(Dan): maybe need to use vp9 function to find the reference block,
+      // to compare with the results of my python code, I first use my way to
+      // compute the reference block
+      ref_offset = ref_h * ref_stride + ref_w;
+      target = ref_frame->y_buffer + ref_offset;
+      search_dist = fn_ptr->sdf(center, cur_stride, target, ref_stride);
+      // compute target's neighbors' distortions
+      // TODO(Dan): if using padding, the boundary condition may vary
+      // up
+      if (ref_h - bh >= 0) {
+        nb = target - ref_stride * bh;
+        nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+        I_row += nb_dist - search_dist;
+      }
+      // down
+      if (ref_h + bh < height - bh) {
+        nb = target + ref_stride * bh;
+        nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+        I_row += nb_dist - search_dist;
+      }
+      if (ref_h - bh >= 0 && ref_h + bh < height - bh) {
+        I_row /= 2;
+      }
+      I_row /= (bw * bh);
+      // left
+      if (ref_w - bw >= 0) {
+        nb = target - bw;
+        nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+        I_col += nb_dist - search_dist;
+      }
+      // down
+      if (ref_w + bw < width - bw) {
+        nb = target + bw;
+        nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+        I_col += nb_dist - search_dist;
+      }
+      if (ref_w - bw >= 0 && ref_w + bw < width - bw) {
+        I_col /= 2;
+      }
+      I_col /= (bw * bh);
+      M[row * cols + col][0] = I_row * I_row;
+      M[row * cols + col][1] = I_row * I_col;
+      M[row * cols + col][2] = I_col * I_row;
+      M[row * cols + col][3] = I_col * I_col;
+    }
+  }
+}
diff --git a/libvpx/vp9/encoder/vp9_non_greedy_mv.h b/libvpx/vp9/encoder/vp9_non_greedy_mv.h
new file mode 100644
index 000000000..c2bd69722
--- /dev/null
+++ b/libvpx/vp9/encoder/vp9_non_greedy_mv.h
@@ -0,0 +1,129 @@
+/*
+ *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_
+#define VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_
+
+#include "vp9/common/vp9_enums.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vpx_scale/yv12config.h"
+#include "vpx_dsp/variance.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#define NB_MVS_NUM 4
+#define LOG2_PRECISION 20
+#define MF_LOCAL_STRUCTURE_SIZE 4
+#define SQUARE_BLOCK_SIZES 4
+
+typedef enum Status { STATUS_OK = 0, STATUS_FAILED = 1 } Status;
+
+typedef struct MotionField {
+  int ready;
+  BLOCK_SIZE bsize;
+  int block_rows;
+  int block_cols;
+  int block_num;  // block_num == block_rows * block_cols
+  int (*local_structure)[MF_LOCAL_STRUCTURE_SIZE];
+  int_mv *mf;
+  int *set_mv;
+  int mv_log_scale;
+} MotionField;
+
+typedef struct MotionFieldInfo {
+  int frame_num;
+  int allocated;
+  MotionField (*motion_field_array)[MAX_INTER_REF_FRAMES][SQUARE_BLOCK_SIZES];
+} MotionFieldInfo;
+
+typedef struct {
+  float row, col;
+} FloatMV;
+
+static INLINE int get_square_block_idx(BLOCK_SIZE bsize) {
+  if (bsize == BLOCK_4X4) {
+    return 0;
+  }
+  if (bsize == BLOCK_8X8) {
+    return 1;
+  }
+  if (bsize == BLOCK_16X16) {
+    return 2;
+  }
+  if (bsize == BLOCK_32X32) {
+    return 3;
+  }
+  assert(0 && "ERROR: non-square block size");
+  return -1;
+}
+
+static INLINE BLOCK_SIZE square_block_idx_to_bsize(int square_block_idx) {
+  if (square_block_idx == 0) {
+    return BLOCK_4X4;
+  }
+  if (square_block_idx == 1) {
+    return BLOCK_8X8;
+  }
+  if (square_block_idx == 2) {
+    return BLOCK_16X16;
+  }
+  if (square_block_idx == 3) {
+    return BLOCK_32X32;
+  }
+  assert(0 && "ERROR: invalid square_block_idx");
+  return BLOCK_INVALID;
+}
+
+Status vp9_alloc_motion_field_info(MotionFieldInfo *motion_field_info,
+                                   int frame_num, int mi_rows, int mi_cols);
+
+Status vp9_alloc_motion_field(MotionField *motion_field, BLOCK_SIZE bsize,
+                              int block_rows, int block_cols);
+
+void vp9_free_motion_field(MotionField *motion_field);
+
+void vp9_free_motion_field_info(MotionFieldInfo *motion_field_info);
+
+int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs,
+                                 int mv_num);
+
+void vp9_get_smooth_motion_field(const MV *search_mf,
+                                 const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
+                                 int rows, int cols, BLOCK_SIZE bize,
+                                 float alpha, int num_iters, MV *smooth_mf);
+
+void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame,
+                             const YV12_BUFFER_CONFIG *ref_frame,
+                             const MV *search_mf,
+                             const vp9_variance_fn_ptr_t *fn_ptr, int rows,
+                             int cols, BLOCK_SIZE bsize,
+                             int (*M)[MF_LOCAL_STRUCTURE_SIZE]);
+
+MotionField *vp9_motion_field_info_get_motion_field(
+    MotionFieldInfo *motion_field_info, int frame_idx, int rf_idx,
+    BLOCK_SIZE bsize);
+
+void vp9_motion_field_mi_set_mv(MotionField *motion_field, int mi_row,
+                                int mi_col, int_mv mv);
+
+void vp9_motion_field_reset_mvs(MotionField *motion_field);
+
+int_mv vp9_motion_field_get_mv(const MotionField *motion_field, int brow,
+                               int bcol);
+int_mv vp9_motion_field_mi_get_mv(const MotionField *motion_field, int mi_row,
+                                  int mi_col);
+int vp9_motion_field_is_mv_set(const MotionField *motion_field, int brow,
+                               int bcol);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+#endif  // VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_
diff --git a/libvpx/vp9/encoder/vp9_pickmode.c b/libvpx/vp9/encoder/vp9_pickmode.c
index 513b9f678..9b2e48505 100644
--- a/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/libvpx/vp9/encoder/vp9_pickmode.c
@@ -1501,7 +1501,8 @@ static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
   int best_early_term = 0;
   int best_flag_preduv_computed[2] = { 0 };
   INTERP_FILTER filter_start = force_smooth_filter ? EIGHTTAP_SMOOTH : EIGHTTAP;
-  for (filter = filter_start; filter <= EIGHTTAP_SMOOTH; ++filter) {
+  INTERP_FILTER filter_end = EIGHTTAP_SMOOTH;
+  for (filter = filter_start; filter <= filter_end; ++filter) {
     int64_t cost;
     mi->interp_filter = filter;
     vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
@@ -1531,9 +1532,11 @@ static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
           free_pred_buffer(*this_mode_pred);
           *this_mode_pred = current_pred;
         }
-        current_pred = &tmp[get_pred_buffer(tmp, 3)];
-        pd->dst.buf = current_pred->data;
-        pd->dst.stride = bw;
+        if (filter != filter_end) {
+          current_pred = &tmp[get_pred_buffer(tmp, 3)];
+          pd->dst.buf = current_pred->data;
+          pd->dst.stride = bw;
+        }
       }
     }
   }
@@ -1554,6 +1557,9 @@ static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
   if (reuse_inter_pred) {
     pd->dst.buf = (*this_mode_pred)->data;
     pd->dst.stride = (*this_mode_pred)->stride;
+  } else if (best_filter < filter_end) {
+    mi->interp_filter = best_filter;
+    vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
   }
 }
 
@@ -1713,9 +1719,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
   // process.
   // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
   PRED_BUFFER tmp[4];
-  DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64]);
+  DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64] VPX_UNINITIALIZED);
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64] VPX_UNINITIALIZED);
 #endif
   struct buf_2d orig_dst = pd->dst;
   PRED_BUFFER *this_mode_pred = NULL;
@@ -2554,6 +2560,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
       if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize]))
         continue;
 
+      if (cpi->sf.rt_intra_dc_only_low_content && this_mode != DC_PRED &&
+          x->content_state_sb != kVeryHighSad)
+        continue;
+
       if ((cpi->sf.adaptive_rd_thresh_row_mt &&
            rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh,
                                       &rd_thresh_freq_fact[mode_index])) ||
diff --git a/libvpx/vp9/encoder/vp9_ratectrl.c b/libvpx/vp9/encoder/vp9_ratectrl.c
index 6745b0adf..cbafbf7b9 100644
--- a/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -436,7 +436,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
   rc->use_post_encode_drop = 0;
   rc->ext_use_post_encode_drop = 0;
   rc->arf_active_best_quality_adjustment_factor = 1.0;
-
+  rc->arf_increase_active_best_quality = 0;
   rc->preserve_arf_as_gld = 0;
   rc->preserve_next_arf_as_gld = 0;
   rc->show_arf_as_gld = 0;
@@ -504,7 +504,7 @@ static int check_buffer_below_thresh(VP9_COMP *cpi, int drop_mark) {
   }
 }
 
-static int drop_frame(VP9_COMP *cpi) {
+int vp9_test_drop(VP9_COMP *cpi) {
   const VP9EncoderConfig *oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
   SVC *svc = &cpi->svc;
@@ -609,13 +609,15 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
   SVC *svc = &cpi->svc;
   int svc_prev_layer_dropped = 0;
   // In the constrained or full_superframe framedrop mode for svc
-  // (framedrop_mode !=  LAYER_DROP), if the previous spatial layer was
-  // dropped, drop the current spatial layer.
+  // (framedrop_mode != (LAYER_DROP && CONSTRAINED_FROM_ABOVE)),
+  // if the previous spatial layer was dropped, drop the current spatial layer.
   if (cpi->use_svc && svc->spatial_layer_id > 0 &&
       svc->drop_spatial_layer[svc->spatial_layer_id - 1])
     svc_prev_layer_dropped = 1;
-  if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP) ||
-      drop_frame(cpi)) {
+  if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP &&
+       svc->framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP) ||
+      svc->force_drop_constrained_from_above[svc->spatial_layer_id] ||
+      vp9_test_drop(cpi)) {
     vp9_rc_postencode_update_drop_frame(cpi);
     cpi->ext_refresh_frame_flags_pending = 0;
     cpi->last_frame_dropped = 1;
@@ -625,14 +627,17 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
       svc->drop_count[svc->spatial_layer_id]++;
       svc->skip_enhancement_layer = 1;
       if (svc->framedrop_mode == LAYER_DROP ||
+          (svc->framedrop_mode == CONSTRAINED_FROM_ABOVE_DROP &&
+           svc->force_drop_constrained_from_above[svc->number_spatial_layers -
+                                                  1] == 0) ||
           svc->drop_spatial_layer[0] == 0) {
-        // For the case of constrained drop mode where the base is dropped
-        // (drop_spatial_layer[0] == 1), which means full superframe dropped,
-        // we don't increment the svc frame counters. In particular temporal
-        // layer counter (which is incremented in vp9_inc_frame_in_layer())
-        // won't be incremented, so on a dropped frame we try the same
-        // temporal_layer_id on next incoming frame. This is to avoid an
-        // issue with temporal alignement with full superframe dropping.
+        // For the case of constrained drop mode where full superframe is
+        // dropped, we don't increment the svc frame counters.
+        // In particular temporal layer counter (which is incremented in
+        // vp9_inc_frame_in_layer()) won't be incremented, so on a dropped
+        // frame we try the same temporal_layer_id on next incoming frame.
+        // This is to avoid an issue with temporal alignement with full
+        // superframe dropping.
         vp9_inc_frame_in_layer(cpi);
       }
       if (svc->spatial_layer_id == svc->number_spatial_layers - 1) {
@@ -1420,8 +1425,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
   int active_worst_quality = cpi->twopass.active_worst_quality;
   int q;
   int *inter_minq;
-  int arf_active_best_quality_adjustment, arf_active_best_quality_max;
-  int *arfgf_high_motion_minq;
+  int arf_active_best_quality_hl;
+  int *arfgf_high_motion_minq, *arfgf_low_motion_minq;
   const int boost_frame =
       !rc->is_src_frame_alt_ref &&
       (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
@@ -1448,14 +1453,20 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
       if (q < cq_level) q = cq_level;
     }
     active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
+    arf_active_best_quality_hl = active_best_quality;
 
-    ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_high_motion_minq);
-    arf_active_best_quality_max = arfgf_high_motion_minq[q];
-    arf_active_best_quality_adjustment =
-        arf_active_best_quality_max - active_best_quality;
-    active_best_quality = arf_active_best_quality_max -
-                          (int)(arf_active_best_quality_adjustment *
-                                rc->arf_active_best_quality_adjustment_factor);
+    if (rc->arf_increase_active_best_quality == 1) {
+      ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_high_motion_minq);
+      arf_active_best_quality_hl = arfgf_high_motion_minq[q];
+    } else if (rc->arf_increase_active_best_quality == -1) {
+      ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_low_motion_minq);
+      arf_active_best_quality_hl = arfgf_low_motion_minq[q];
+    }
+    active_best_quality =
+        (int)((double)active_best_quality *
+                  rc->arf_active_best_quality_adjustment_factor +
+              (double)arf_active_best_quality_hl *
+                  (1.0 - rc->arf_active_best_quality_adjustment_factor));
 
     // Modify best quality for second level arfs. For mode VPX_Q this
     // becomes the baseline frame q.
@@ -1480,17 +1491,30 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
   // Extension to max or min Q if undershoot or overshoot is outside
   // the permitted range.
   if (frame_is_intra_only(cm) || boost_frame) {
+    const int layer_depth = gf_group->layer_depth[gf_group_index];
     active_best_quality -=
         (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
     active_worst_quality += (cpi->twopass.extend_maxq / 2);
+
+    if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) {
+      assert(layer_depth > 1);
+      active_best_quality =
+          VPXMAX(active_best_quality,
+                 cpi->twopass.last_qindex_of_arf_layer[layer_depth - 1]);
+    }
   } else {
+    const int max_layer_depth = gf_group->max_layer_depth;
+    assert(max_layer_depth > 0);
+
     active_best_quality -=
         (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2;
     active_worst_quality += cpi->twopass.extend_maxq;
 
     // For normal frames do not allow an active minq lower than the q used for
     // the last boosted frame.
-    active_best_quality = VPXMAX(active_best_quality, rc->last_boosted_qindex);
+    active_best_quality =
+        VPXMAX(active_best_quality,
+               cpi->twopass.last_qindex_of_arf_layer[max_layer_depth - 1]);
   }
 
 #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
@@ -1789,6 +1813,9 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
   RATE_CONTROL *const rc = &cpi->rc;
   SVC *const svc = &cpi->svc;
   const int qindex = cm->base_qindex;
+  const GF_GROUP *gf_group = &cpi->twopass.gf_group;
+  const int gf_group_index = cpi->twopass.gf_group.index;
+  const int layer_depth = gf_group->layer_depth[gf_group_index];
 
   // Update rate control heuristics
   rc->projected_frame_size = (int)(bytes_used << 3);
@@ -1843,6 +1870,15 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
         (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
     rc->last_boosted_qindex = qindex;
   }
+
+  if ((qindex < cpi->twopass.last_qindex_of_arf_layer[layer_depth]) ||
+      (cm->frame_type == KEY_FRAME) ||
+      (!rc->constrained_gf_group &&
+       (cpi->refresh_alt_ref_frame ||
+        (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
+    cpi->twopass.last_qindex_of_arf_layer[layer_depth] = qindex;
+  }
+
   if (frame_is_intra_only(cm)) rc->last_kf_qindex = qindex;
 
   update_buffer_level_postencode(cpi, rc->projected_frame_size);
@@ -2441,12 +2477,23 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
     // Set Maximum gf/arf interval
     rc->max_gf_interval = oxcf->max_gf_interval;
     rc->min_gf_interval = oxcf->min_gf_interval;
+#if CONFIG_RATE_CTRL
+    if (rc->min_gf_interval == 0) {
+      rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(
+          oxcf->width, oxcf->height, oxcf->init_framerate);
+    }
+    if (rc->max_gf_interval == 0) {
+      rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
+          oxcf->init_framerate, rc->min_gf_interval);
+    }
+#else
     if (rc->min_gf_interval == 0)
       rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(
           oxcf->width, oxcf->height, cpi->framerate);
     if (rc->max_gf_interval == 0)
       rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
           cpi->framerate, rc->min_gf_interval);
+#endif
 
     // Extended max interval for genuinely static scenes like slide shows.
     rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH;
diff --git a/libvpx/vp9/encoder/vp9_ratectrl.h b/libvpx/vp9/encoder/vp9_ratectrl.h
index 09d69e4d4..7dbe17dc5 100644
--- a/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -198,7 +198,7 @@ typedef struct {
 
   int damped_adjustment[RATE_FACTOR_LEVELS];
   double arf_active_best_quality_adjustment_factor;
-  int arf_active_best_quality_adjustment_window;
+  int arf_increase_active_best_quality;
 
   int preserve_arf_as_gld;
   int preserve_next_arf_as_gld;
@@ -267,6 +267,8 @@ void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi);
 // Post encode drop for CBR mode.
 int post_encode_drop_cbr(struct VP9_COMP *cpi, size_t *size);
 
+int vp9_test_drop(struct VP9_COMP *cpi);
+
 // Decide if we should drop this frame: For 1-pass CBR.
 // Changes only the decimation count in the rate control structure
 int vp9_rc_drop_frame(struct VP9_COMP *cpi);
diff --git a/libvpx/vp9/encoder/vp9_rd.h b/libvpx/vp9/encoder/vp9_rd.h
index df6ea9094..908989c07 100644
--- a/libvpx/vp9/encoder/vp9_rd.h
+++ b/libvpx/vp9/encoder/vp9_rd.h
@@ -38,8 +38,6 @@ extern "C" {
 #define MV_COST_WEIGHT 108
 #define MV_COST_WEIGHT_SUB 120
 
-#define INVALID_MV 0x80008000
-
 #define MAX_MODES 30
 #define MAX_REFS 6
 
diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c
index d07d91774..fa7472ca6 100644
--- a/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/libvpx/vp9/encoder/vp9_rdopt.c
@@ -2494,19 +2494,19 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
   const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
   MV pred_mv[3];
 
+  int bestsme = INT_MAX;
 #if CONFIG_NON_GREEDY_MV
-  double bestsme;
-  int_mv nb_full_mvs[NB_MVS_NUM];
-  const int nb_full_mv_num = NB_MVS_NUM;
   int gf_group_idx = cpi->twopass.gf_group.index;
   int gf_rf_idx = ref_frame_to_gf_rf_idx(ref);
   BLOCK_SIZE square_bsize = get_square_block_size(bsize);
+  int_mv nb_full_mvs[NB_MVS_NUM] = { 0 };
+  MotionField *motion_field = vp9_motion_field_info_get_motion_field(
+      &cpi->motion_field_info, gf_group_idx, gf_rf_idx, square_bsize);
+  const int nb_full_mv_num =
+      vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
   const int lambda = (pw * ph) / 4;
   assert(pw * ph == lambda << 2);
-  vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col,
-                          gf_rf_idx, square_bsize, nb_full_mvs);
 #else   // CONFIG_NON_GREEDY_MV
-  int bestsme = INT_MAX;
   int sadpb = x->sadperbit16;
 #endif  // CONFIG_NON_GREEDY_MV
 
@@ -2580,9 +2580,9 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
   mvp_full.row >>= 3;
 
 #if CONFIG_NON_GREEDY_MV
-  bestsme = vp9_full_pixel_diamond_new(cpi, x, &mvp_full, step_param, lambda, 1,
-                                       &cpi->fn_ptr[bsize], nb_full_mvs,
-                                       nb_full_mv_num, &tmp_mv->as_mv);
+  bestsme = vp9_full_pixel_diamond_new(cpi, x, bsize, &mvp_full, step_param,
+                                       lambda, 1, nb_full_mvs, nb_full_mv_num,
+                                       &tmp_mv->as_mv);
 #else   // CONFIG_NON_GREEDY_MV
   bestsme = vp9_full_pixel_search(
       cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb,
@@ -2592,11 +2592,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
   if (cpi->sf.enhanced_full_pixel_motion_search) {
     int i;
     for (i = 0; i < 3; ++i) {
-#if CONFIG_NON_GREEDY_MV
-      double this_me;
-#else   // CONFIG_NON_GREEDY_MV
       int this_me;
-#endif  // CONFIG_NON_GREEDY_MV
       MV this_mv;
       int diff_row;
       int diff_col;
@@ -2622,9 +2618,9 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
       mvp_full.row >>= 3;
 #if CONFIG_NON_GREEDY_MV
       this_me = vp9_full_pixel_diamond_new(
-          cpi, x, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step),
-          lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num,
-          &this_mv);
+          cpi, x, bsize, &mvp_full,
+          VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), lambda, 1, nb_full_mvs,
+          nb_full_mv_num, &this_mv);
 #else   // CONFIG_NON_GREEDY_MV
       this_me = vp9_full_pixel_search(
           cpi, x, bsize, &mvp_full,
@@ -2678,8 +2674,7 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd,
 // However, once established that vector may be usable through the nearest and
 // near mv modes to reduce distortion in subsequent blocks and also improve
 // visual quality.
-static int discount_newmv_test(const VP9_COMP *cpi, int this_mode,
-                               int_mv this_mv,
+static int discount_newmv_test(VP9_COMP *cpi, int this_mode, int_mv this_mv,
                                int_mv (*mode_mv)[MAX_REF_FRAMES], int ref_frame,
                                int mi_row, int mi_col, BLOCK_SIZE bsize) {
 #if CONFIG_NON_GREEDY_MV
@@ -2689,6 +2684,8 @@ static int discount_newmv_test(const VP9_COMP *cpi, int this_mode,
     const int gf_group_idx = cpi->twopass.gf_group.index;
     const int gf_rf_idx = ref_frame_to_gf_rf_idx(ref_frame);
     const TplDepFrame tpl_frame = cpi->tpl_stats[gf_group_idx];
+    const MotionField *motion_field = vp9_motion_field_info_get_motion_field(
+        &cpi->motion_field_info, gf_group_idx, gf_rf_idx, cpi->tpl_bsize);
     const int tpl_block_mi_h = num_8x8_blocks_high_lookup[cpi->tpl_bsize];
     const int tpl_block_mi_w = num_8x8_blocks_wide_lookup[cpi->tpl_bsize];
     const int tpl_mi_row = mi_row - (mi_row % tpl_block_mi_h);
@@ -2697,8 +2694,8 @@ static int discount_newmv_test(const VP9_COMP *cpi, int this_mode,
         tpl_frame
             .mv_mode_arr[gf_rf_idx][tpl_mi_row * tpl_frame.stride + tpl_mi_col];
     if (mv_mode == NEW_MV_MODE) {
-      int_mv tpl_new_mv = *get_pyramid_mv(&tpl_frame, gf_rf_idx, cpi->tpl_bsize,
-                                          tpl_mi_row, tpl_mi_col);
+      int_mv tpl_new_mv =
+          vp9_motion_field_mi_get_mv(motion_field, tpl_mi_row, tpl_mi_col);
       int row_diff = abs(tpl_new_mv.as_mv.row - this_mv.as_mv.row);
       int col_diff = abs(tpl_new_mv.as_mv.col - this_mv.as_mv.col);
       if (VPXMAX(row_diff, col_diff) <= 8) {
@@ -3455,7 +3452,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
   if (cpi->rc.is_src_frame_alt_ref) {
     if (sf->alt_ref_search_fp) {
       mode_skip_mask[ALTREF_FRAME] = 0;
-      ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
+      ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME) & 0xff;
       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
     }
   }
diff --git a/libvpx/vp9/encoder/vp9_speed_features.c b/libvpx/vp9/encoder/vp9_speed_features.c
index 529dca040..0b24b5cb3 100644
--- a/libvpx/vp9/encoder/vp9_speed_features.c
+++ b/libvpx/vp9/encoder/vp9_speed_features.c
@@ -456,6 +456,7 @@ static void set_rt_speed_feature_framesize_independent(
   sf->variance_part_thresh_mult = 1;
   sf->cb_pred_filter_search = 0;
   sf->force_smooth_interpol = 0;
+  sf->rt_intra_dc_only_low_content = 0;
 
   if (speed >= 1) {
     sf->allow_txfm_domain_distortion = 1;
@@ -535,13 +536,6 @@ static void set_rt_speed_feature_framesize_independent(
     int i;
     if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0)
       sf->use_altref_onepass = 1;
-    sf->last_partitioning_redo_frequency = 4;
-    sf->adaptive_rd_thresh = 5;
-    sf->use_fast_coef_costing = 0;
-    sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
-    sf->adjust_partitioning_from_last_frame =
-        cm->last_frame_type != cm->frame_type ||
-        (0 == (frames_since_key + 1) % sf->last_partitioning_redo_frequency);
     sf->mv.subpel_force_stop = QUARTER_PEL;
     for (i = 0; i < TX_SIZES; i++) {
       sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
@@ -550,13 +544,19 @@ static void set_rt_speed_feature_framesize_independent(
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
     sf->frame_parameter_update = 0;
     sf->mv.search_method = FAST_HEX;
-
-    sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW;
-    sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
-    sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
-    sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST;
+    sf->allow_skip_recode = 0;
     sf->max_intra_bsize = BLOCK_32X32;
-    sf->allow_skip_recode = 1;
+    sf->use_fast_coef_costing = 0;
+    sf->use_quant_fp = !is_keyframe;
+    sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
+    sf->adaptive_rd_thresh = 2;
+    sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED;
+    sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH;
+    sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
+    sf->partition_search_type = VAR_BASED_PARTITION;
   }
 
   if (speed >= 5) {
@@ -740,12 +740,7 @@ static void set_rt_speed_feature_framesize_independent(
       sf->nonrd_use_ml_partition = 0;
 #endif
     if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = HALF_PEL;
-    // Only keep INTRA_DC mode for speed 8.
-    if (!is_keyframe) {
-      int i = 0;
-      for (i = 0; i < BLOCK_SIZES; ++i)
-        sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
-    }
+    sf->rt_intra_dc_only_low_content = 1;
     if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
         content != VP9E_CONTENT_SCREEN) {
       // More aggressive short circuit for speed 8.
@@ -771,6 +766,12 @@ static void set_rt_speed_feature_framesize_independent(
   }
 
   if (speed >= 9) {
+    // Only keep INTRA_DC mode for speed 9.
+    if (!is_keyframe) {
+      int i = 0;
+      for (i = 0; i < BLOCK_SIZES; ++i)
+        sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
+    }
     sf->cb_pred_filter_search = 1;
     sf->mv.enable_adaptive_subpel_force_stop = 1;
     sf->mv.adapt_subpel_force_stop.mv_thresh = 1;
@@ -817,7 +818,7 @@ static void set_rt_speed_feature_framesize_independent(
   }
   // TODO(marpan): There is regression for aq-mode=3 speed <= 4, force it
   // off for now.
-  if (speed <= 4 && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+  if (speed <= 3 && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
     cpi->oxcf.aq_mode = 0;
 }
 
diff --git a/libvpx/vp9/encoder/vp9_speed_features.h b/libvpx/vp9/encoder/vp9_speed_features.h
index eb0628199..ca284ded8 100644
--- a/libvpx/vp9/encoder/vp9_speed_features.h
+++ b/libvpx/vp9/encoder/vp9_speed_features.h
@@ -608,6 +608,10 @@ typedef struct SPEED_FEATURES {
 
   // Force subpel motion filter to always use SMOOTH_FILTER.
   int force_smooth_interpol;
+
+  // For real-time mode: force DC only under intra search when content
+  // does not have high souce SAD.
+  int rt_intra_dc_only_low_content;
 } SPEED_FEATURES;
 
 struct VP9_COMP;
diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.c b/libvpx/vp9/encoder/vp9_svc_layercontext.c
index 8ba113bf3..32ee6e064 100644
--- a/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -57,8 +57,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
   svc->simulcast_mode = 0;
 
   for (i = 0; i < REF_FRAMES; ++i) {
-    svc->fb_idx_spatial_layer_id[i] = -1;
-    svc->fb_idx_temporal_layer_id[i] = -1;
+    svc->fb_idx_spatial_layer_id[i] = 0xff;
+    svc->fb_idx_temporal_layer_id[i] = 0xff;
     svc->fb_idx_base[i] = 0;
   }
   for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
@@ -74,6 +74,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
     svc->fb_idx_upd_tl0[sl] = -1;
     svc->drop_count[sl] = 0;
     svc->spatial_layer_sync[sl] = 0;
+    svc->force_drop_constrained_from_above[sl] = 0;
   }
   svc->max_consec_drop = INT_MAX;
 
@@ -770,6 +771,32 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
   svc->mi_rows[svc->spatial_layer_id] = cpi->common.mi_rows;
   svc->mi_cols[svc->spatial_layer_id] = cpi->common.mi_cols;
 
+  // For constrained_from_above drop mode: before encoding superframe (i.e.,
+  // at SL0 frame) check all spatial layers (starting from top) for possible
+  // drop, and if so, set a flag to force drop of that layer and all its lower
+  // layers.
+  if (svc->spatial_layer_to_encode == svc->first_spatial_layer_to_encode) {
+    int sl;
+    for (sl = 0; sl < svc->number_spatial_layers; sl++)
+      svc->force_drop_constrained_from_above[sl] = 0;
+    if (svc->framedrop_mode == CONSTRAINED_FROM_ABOVE_DROP) {
+      for (sl = svc->number_spatial_layers - 1;
+           sl >= svc->first_spatial_layer_to_encode; sl--) {
+        int layer = sl * svc->number_temporal_layers + svc->temporal_layer_id;
+        LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+        cpi->rc = lc->rc;
+        cpi->oxcf.target_bandwidth = lc->target_bandwidth;
+        if (vp9_test_drop(cpi)) {
+          int sl2;
+          // Set flag to force drop in encoding for this mode.
+          for (sl2 = sl; sl2 >= svc->first_spatial_layer_to_encode; sl2--)
+            svc->force_drop_constrained_from_above[sl2] = 1;
+          break;
+        }
+      }
+    }
+  }
+
   if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
     set_flags_and_fb_idx_for_temporal_mode3(cpi);
   } else if (svc->temporal_layering_mode ==
diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.h b/libvpx/vp9/encoder/vp9_svc_layercontext.h
index 77d438266..f1ba77970 100644
--- a/libvpx/vp9/encoder/vp9_svc_layercontext.h
+++ b/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -58,7 +58,6 @@ typedef struct {
   int gold_ref_idx;
   int has_alt_frame;
   size_t layer_size;
-  struct vpx_psnr_pkt psnr_pkt;
   // Cyclic refresh parameters (aq-mode=3), that need to be updated per-frame.
   // TODO(jianj/marpan): Is it better to use the full cyclic refresh struct.
   int sb_index;
@@ -138,6 +137,7 @@ typedef struct SVC {
   int drop_spatial_layer[VPX_MAX_LAYERS];
   int framedrop_thresh[VPX_MAX_LAYERS];
   int drop_count[VPX_MAX_LAYERS];
+  int force_drop_constrained_from_above[VPX_MAX_LAYERS];
   int max_consec_drop;
   SVC_LAYER_DROP_MODE framedrop_mode;
 
diff --git a/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
index aa46c5889..4be6a5ea0 100644
--- a/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
+++ b/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
@@ -114,7 +114,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
   // Work out the start point for the search
   const uint8_t *best_address = in_what;
   const uint8_t *new_best_address = best_address;
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
   __m128i v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
 #else
   __m128i v_ba_d = _mm_set1_epi32((intptr_t)best_address);
@@ -138,7 +138,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
   for (i = 0, step = 0; step < tot_steps; step++) {
     for (j = 0; j < cfg->searches_per_step; j += 4, i += 4) {
       __m128i v_sad_d, v_cost_d, v_outside_d, v_inside_d, v_diff_mv_w;
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
       __m128i v_blocka[2];
 #else
       __m128i v_blocka[1];
@@ -175,7 +175,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
 
       // Compute the SIMD pointer offsets.
       {
-#if ARCH_X86_64  //  sizeof(intptr_t) == 8
+#if VPX_ARCH_X86_64  //  sizeof(intptr_t) == 8
         // Load the offsets
         __m128i v_bo10_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 0]);
         __m128i v_bo32_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 2]);
@@ -186,7 +186,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
         // Compute the candidate addresses
         v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q);
         v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q);
-#else  // ARCH_X86 //  sizeof(intptr_t) == 4
+#else  // VPX_ARCH_X86 //  sizeof(intptr_t) == 4
         __m128i v_bo_d = _mm_loadu_si128((const __m128i *)&ss_os[i]);
         v_bo_d = _mm_and_si128(v_bo_d, v_inside_d);
         v_blocka[0] = _mm_add_epi32(v_ba_d, v_bo_d);
@@ -294,7 +294,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
     best_address = new_best_address;
 
     v_bmv_w = _mm_set1_epi32(bmv.as_int);
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
     v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
 #else
     v_ba_d = _mm_set1_epi32((intptr_t)best_address);
diff --git a/libvpx/vp9/encoder/x86/vp9_error_sse2.asm b/libvpx/vp9/encoder/x86/vp9_error_sse2.asm
index 11d473b2d..7beec130a 100644
--- a/libvpx/vp9/encoder/x86/vp9_error_sse2.asm
+++ b/libvpx/vp9/encoder/x86/vp9_error_sse2.asm
@@ -58,7 +58,7 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
   movhlps   m7, m6
   paddq     m4, m5
   paddq     m6, m7
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
   movq    rax, m4
   movq [sszq], m6
 %else
@@ -105,7 +105,7 @@ cglobal block_error_fp, 3, 3, 6, uqc, dqc, size
   ; accumulate horizontally and store in return value
   movhlps   m5, m4
   paddq     m4, m5
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
   movq    rax, m4
 %else
   pshufd   m5, m4, 0x1
diff --git a/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c b/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
index 885220a71..e3d803b8f 100644
--- a/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -25,7 +25,7 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                           const int16_t *iscan) {
   __m128i zero;
   __m128i thr;
-  int16_t nzflag;
+  int nzflag;
   __m128i eob;
   __m128i round, quant, dequant;
 
diff --git a/libvpx/vp9/simple_encode.cc b/libvpx/vp9/simple_encode.cc
new file mode 100644
index 000000000..6a35eb6bc
--- /dev/null
+++ b/libvpx/vp9/simple_encode.cc
@@ -0,0 +1,313 @@
+#include <vector>
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/vp9_iface_common.h"
+#include "vp9/encoder/vp9_encoder.h"
+#include "vp9/encoder/vp9_firstpass.h"
+#include "vp9/simple_encode.h"
+#include "vp9/vp9_cx_iface.h"
+
+namespace vp9 {
+
+// TODO(angiebird): Merge this function with vpx_img_plane_width()
+static int img_plane_width(const vpx_image_t *img, int plane) {
+  if (plane > 0 && img->x_chroma_shift > 0)
+    return (img->d_w + 1) >> img->x_chroma_shift;
+  else
+    return img->d_w;
+}
+
+// TODO(angiebird): Merge this function with vpx_img_plane_height()
+static int img_plane_height(const vpx_image_t *img, int plane) {
+  if (plane > 0 && img->y_chroma_shift > 0)
+    return (img->d_h + 1) >> img->y_chroma_shift;
+  else
+    return img->d_h;
+}
+
+// TODO(angiebird): Merge this function with vpx_img_read()
+static int img_read(vpx_image_t *img, FILE *file) {
+  int plane;
+
+  for (plane = 0; plane < 3; ++plane) {
+    unsigned char *buf = img->planes[plane];
+    const int stride = img->stride[plane];
+    const int w = img_plane_width(img, plane) *
+                  ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
+    const int h = img_plane_height(img, plane);
+    int y;
+
+    for (y = 0; y < h; ++y) {
+      if (fread(buf, 1, w, file) != (size_t)w) return 0;
+      buf += stride;
+    }
+  }
+
+  return 1;
+}
+
+class SimpleEncode::EncodeImpl {
+ public:
+  VP9_COMP *cpi;
+  vpx_img_fmt_t img_fmt;
+  vpx_image_t tmp_img;
+  std::vector<FIRSTPASS_STATS> first_pass_stats;
+};
+
+static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf,
+                              vpx_img_fmt_t img_fmt) {
+  VP9_COMP *cpi;
+  BufferPool *buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(*buffer_pool));
+  vp9_initialize_enc();
+  cpi = vp9_create_compressor(oxcf, buffer_pool);
+  vp9_update_compressor_with_img_fmt(cpi, img_fmt);
+  return cpi;
+}
+
+static void free_encoder(VP9_COMP *cpi) {
+  BufferPool *buffer_pool = cpi->common.buffer_pool;
+  vp9_remove_compressor(cpi);
+  // buffer_pool needs to be free after cpi because buffer_pool contains
+  // allocated buffers that will be free in vp9_remove_compressor()
+  vpx_free(buffer_pool);
+}
+
+static INLINE vpx_rational_t make_vpx_rational(int num, int den) {
+  vpx_rational_t v;
+  v.num = num;
+  v.den = den;
+  return v;
+}
+
+static INLINE FrameType
+get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type) {
+  // TODO(angiebird): Figure out if we need frame type other than key frame,
+  // alternate reference and inter frame
+  switch (update_type) {
+    case KF_UPDATE: return kKeyFrame; break;
+    case ARF_UPDATE: return kAlternateReference; break;
+    default: return kInterFrame; break;
+  }
+}
+
+static void update_encode_frame_result(
+    EncodeFrameResult *encode_frame_result,
+    const ENCODE_FRAME_RESULT *encode_frame_info) {
+  encode_frame_result->coding_data_bit_size =
+      encode_frame_result->coding_data_byte_size * 8;
+  encode_frame_result->show_idx = encode_frame_info->show_idx;
+  encode_frame_result->frame_type =
+      get_frame_type_from_update_type(encode_frame_info->update_type);
+  encode_frame_result->psnr = encode_frame_info->psnr;
+  encode_frame_result->sse = encode_frame_info->sse;
+  encode_frame_result->quantize_index = encode_frame_info->quantize_index;
+}
+
+SimpleEncode::SimpleEncode(int frame_width, int frame_height,
+                           int frame_rate_num, int frame_rate_den,
+                           int target_bitrate, int num_frames,
+                           const char *infile_path) {
+  impl_ptr_ = std::unique_ptr<EncodeImpl>(new EncodeImpl());
+  frame_width_ = frame_width;
+  frame_height_ = frame_height;
+  frame_rate_num_ = frame_rate_num;
+  frame_rate_den_ = frame_rate_den;
+  target_bitrate_ = target_bitrate;
+  num_frames_ = num_frames;
+  // TODO(angirbid): Should we keep a file pointer here or keep the file_path?
+  file_ = fopen(infile_path, "r");
+  impl_ptr_->cpi = NULL;
+  impl_ptr_->img_fmt = VPX_IMG_FMT_I420;
+}
+
+void SimpleEncode::ComputeFirstPassStats() {
+  vpx_rational_t frame_rate =
+      make_vpx_rational(frame_rate_num_, frame_rate_den_);
+  const VP9EncoderConfig oxcf =
+      vp9_get_encoder_config(frame_width_, frame_height_, frame_rate,
+                             target_bitrate_, VPX_RC_FIRST_PASS);
+  VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
+  struct lookahead_ctx *lookahead = cpi->lookahead;
+  int i;
+  int use_highbitdepth = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+  use_highbitdepth = cpi->common.use_highbitdepth;
+#endif
+  vpx_image_t img;
+  vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1);
+  rewind(file_);
+  impl_ptr_->first_pass_stats.clear();
+  for (i = 0; i < num_frames_; ++i) {
+    assert(!vp9_lookahead_full(lookahead));
+    if (img_read(&img, file_)) {
+      int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
+      int64_t ts_start =
+          timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx);
+      int64_t ts_end =
+          timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx + 1);
+      YV12_BUFFER_CONFIG sd;
+      image2yuvconfig(&img, &sd);
+      vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
+      {
+        int64_t time_stamp;
+        int64_t time_end;
+        int flush = 1;  // Makes vp9_get_compressed_data process a frame
+        size_t size;
+        unsigned int frame_flags = 0;
+        ENCODE_FRAME_RESULT encode_frame_info;
+        // TODO(angiebird): Call vp9_first_pass directly
+        vp9_get_compressed_data(cpi, &frame_flags, &size, NULL, &time_stamp,
+                                &time_end, flush, &encode_frame_info);
+        // vp9_get_compressed_data only generates first pass stats not
+        // compresses data
+        assert(size == 0);
+      }
+      impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass));
+    }
+  }
+  vp9_end_first_pass(cpi);
+  // TODO(angiebird): Store the total_stats apart form first_pass_stats
+  impl_ptr_->first_pass_stats.push_back(vp9_get_total_stats(&cpi->twopass));
+  free_encoder(cpi);
+  rewind(file_);
+  vpx_img_free(&img);
+}
+
+std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
+  std::vector<std::vector<double>> output_stats;
+  // TODO(angiebird): This function make several assumptions of
+  // FIRSTPASS_STATS. 1) All elements in FIRSTPASS_STATS are double except the
+  // last one. 2) The last entry of first_pass_stats is the total_stats.
+  // Change the code structure, so that we don't have to make these assumptions
+
+  // Note the last entry of first_pass_stats is the total_stats, we don't need
+  // it.
+  for (size_t i = 0; i < impl_ptr_->first_pass_stats.size() - 1; ++i) {
+    double *buf_start =
+        reinterpret_cast<double *>(&impl_ptr_->first_pass_stats[i]);
+    // We use - 1 here because the last member in FIRSTPASS_STATS is not double
+    double *buf_end =
+        buf_start + sizeof(impl_ptr_->first_pass_stats[i]) / sizeof(*buf_end) -
+        1;
+    std::vector<double> this_stats(buf_start, buf_end);
+    output_stats.push_back(this_stats);
+  }
+  return output_stats;
+}
+
+void SimpleEncode::StartEncode() {
+  assert(impl_ptr_->first_pass_stats.size() > 0);
+  vpx_rational_t frame_rate =
+      make_vpx_rational(frame_rate_num_, frame_rate_den_);
+  VP9EncoderConfig oxcf =
+      vp9_get_encoder_config(frame_width_, frame_height_, frame_rate,
+                             target_bitrate_, VPX_RC_LAST_PASS);
+  vpx_fixed_buf_t stats;
+  stats.buf = impl_ptr_->first_pass_stats.data();
+  stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) *
+             impl_ptr_->first_pass_stats.size();
+
+  vp9_set_first_pass_stats(&oxcf, &stats);
+  assert(impl_ptr_->cpi == NULL);
+  impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
+  vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_,
+                frame_height_, 1);
+  rewind(file_);
+}
+
+void SimpleEncode::EndEncode() {
+  free_encoder(impl_ptr_->cpi);
+  impl_ptr_->cpi = nullptr;
+  vpx_img_free(&impl_ptr_->tmp_img);
+  rewind(file_);
+}
+
+int SimpleEncode::GetKeyFrameGroupSize(int key_frame_index) const {
+  const VP9_COMP *cpi = impl_ptr_->cpi;
+  return vp9_get_frames_to_next_key(&cpi->oxcf, &cpi->frame_info,
+                                    &cpi->twopass.first_pass_info,
+                                    key_frame_index, cpi->rc.min_gf_interval);
+}
+
+void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) {
+  VP9_COMP *cpi = impl_ptr_->cpi;
+  struct lookahead_ctx *lookahead = cpi->lookahead;
+  int use_highbitdepth = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+  use_highbitdepth = cpi->common.use_highbitdepth;
+#endif
+  // The lookahead's size is set to oxcf->lag_in_frames.
+  // We want to fill lookahead to it's max capacity if possible so that the
+  // encoder can construct alt ref frame in time.
+  // In the other words, we hope vp9_get_compressed_data to encode a frame
+  // every time in the function
+  while (!vp9_lookahead_full(lookahead)) {
+    // TODO(angiebird): Check whether we can move this file read logics to
+    // lookahead
+    if (img_read(&impl_ptr_->tmp_img, file_)) {
+      int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
+      int64_t ts_start =
+          timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx);
+      int64_t ts_end = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts,
+                                               next_show_idx + 1);
+      YV12_BUFFER_CONFIG sd;
+      image2yuvconfig(&impl_ptr_->tmp_img, &sd);
+      vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
+    } else {
+      break;
+    }
+  }
+  assert(encode_frame_result->coding_data.get() == nullptr);
+  const size_t max_coding_data_byte_size = frame_width_ * frame_height_ * 3;
+  encode_frame_result->coding_data = std::move(
+      std::unique_ptr<uint8_t[]>(new uint8_t[max_coding_data_byte_size]));
+  int64_t time_stamp;
+  int64_t time_end;
+  int flush = 1;  // Make vp9_get_compressed_data encode a frame
+  unsigned int frame_flags = 0;
+  ENCODE_FRAME_RESULT encode_frame_info;
+  vp9_get_compressed_data(cpi, &frame_flags,
+                          &encode_frame_result->coding_data_byte_size,
+                          encode_frame_result->coding_data.get(), &time_stamp,
+                          &time_end, flush, &encode_frame_info);
+  // vp9_get_compressed_data is expected to encode a frame every time, so the
+  // data size should be greater than zero.
+  assert(encode_frame_result->coding_data_byte_size > 0);
+  assert(encode_frame_result->coding_data_byte_size <
+         max_coding_data_byte_size);
+
+  update_encode_frame_result(encode_frame_result, &encode_frame_info);
+}
+
+void SimpleEncode::EncodeFrameWithQuantizeIndex(
+    EncodeFrameResult *encode_frame_result, int quantize_index) {
+  encode_command_set_external_quantize_index(&impl_ptr_->cpi->encode_command,
+                                             quantize_index);
+  EncodeFrame(encode_frame_result);
+  encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command);
+}
+
+int SimpleEncode::GetCodingFrameNum() const {
+  assert(impl_ptr_->first_pass_stats.size() - 1 > 0);
+  // These are the default settings for now.
+  const int multi_layer_arf = 0;
+  const int allow_alt_ref = 1;
+  vpx_rational_t frame_rate =
+      make_vpx_rational(frame_rate_num_, frame_rate_den_);
+  const VP9EncoderConfig oxcf =
+      vp9_get_encoder_config(frame_width_, frame_height_, frame_rate,
+                             target_bitrate_, VPX_RC_LAST_PASS);
+  FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
+  FIRST_PASS_INFO first_pass_info;
+  fps_init_first_pass_info(&first_pass_info, impl_ptr_->first_pass_stats.data(),
+                           num_frames_);
+  return vp9_get_coding_frame_num(&oxcf, &frame_info, &first_pass_info,
+                                  multi_layer_arf, allow_alt_ref);
+}
+
+SimpleEncode::~SimpleEncode() {
+  if (this->file_ != NULL) {
+    fclose(this->file_);
+  }
+}
+
+}  // namespace vp9
diff --git a/libvpx/vp9/simple_encode.h b/libvpx/vp9/simple_encode.h
new file mode 100644
index 000000000..471b4e7a8
--- /dev/null
+++ b/libvpx/vp9/simple_encode.h
@@ -0,0 +1,104 @@
+/*
+ *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_SIMPLE_ENCODE_H_
+#define VPX_VP9_SIMPLE_ENCODE_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <memory>
+#include <vector>
+
+namespace vp9 {
+
+enum FrameType {
+  kKeyFrame = 0,
+  kInterFrame,
+  kAlternateReference,
+};
+
+struct EncodeFrameResult {
+  int show_idx;
+  FrameType frame_type;
+  size_t coding_data_bit_size;
+  size_t coding_data_byte_size;
+  // The EncodeFrame will allocate a buffer, write the coding data into the
+  // buffer and give the ownership of the buffer to coding_data.
+  std::unique_ptr<unsigned char[]> coding_data;
+  double psnr;
+  uint64_t sse;
+  int quantize_index;
+};
+
+class SimpleEncode {
+ public:
+  SimpleEncode(int frame_width, int frame_height, int frame_rate_num,
+               int frame_rate_den, int target_bitrate, int num_frames,
+               const char *infile_path);
+  ~SimpleEncode();
+  SimpleEncode(SimpleEncode &) = delete;
+  SimpleEncode &operator=(const SimpleEncode &) = delete;
+
+  // Makes encoder compute the first pass stats and store it internally for
+  // future encode.
+  void ComputeFirstPassStats();
+
+  // Outputs the first pass stats represented by a 2-D vector.
+  // One can use the frame index at first dimension to retrieve the stats for
+  // each video frame. The stats of each video frame is a vector of 25 double
+  // values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h
+  std::vector<std::vector<double>> ObserveFirstPassStats();
+
+  // Initializes the encoder for actual encoding.
+  // This function should be called after ComputeFirstPassStats().
+  void StartEncode();
+
+  // Frees the encoder.
+  // This function should be called after StartEncode() or EncodeFrame().
+  void EndEncode();
+
+  // Given a key_frame_index, computes this key frame group's size.
+  // The key frame group size includes one key frame plus the number of
+  // following inter frames. Note that the key frame group size only counts the
+  // show frames. The number of no show frames like alternate refereces are not
+  // counted.
+  int GetKeyFrameGroupSize(int key_frame_index) const;
+
+  // Encodes a frame
+  // This function should be called after StartEncode() and before EndEncode().
+  void EncodeFrame(EncodeFrameResult *encode_frame_result);
+
+  // Encodes a frame with a specific quantize index.
+  // This function should be called after StartEncode() and before EndEncode().
+  void EncodeFrameWithQuantizeIndex(EncodeFrameResult *encode_frame_result,
+                                    int quantize_index);
+
+  // Gets the number of coding frames for the video. The coding frames include
+  // show frame and no show frame.
+  // This function should be called after ComputeFirstPassStats().
+  int GetCodingFrameNum() const;
+
+ private:
+  class EncodeImpl;
+
+  int frame_width_;
+  int frame_height_;
+  int frame_rate_num_;
+  int frame_rate_den_;
+  int target_bitrate_;
+  int num_frames_;
+  std::FILE *file_;
+  std::unique_ptr<EncodeImpl> impl_ptr_;
+};
+
+}  // namespace vp9
+
+#endif  // VPX_VP9_SIMPLE_ENCODE_H_
diff --git a/libvpx/vp9/vp9_common.mk b/libvpx/vp9/vp9_common.mk
index c9a55669e..5ef2f891a 100644
--- a/libvpx/vp9/vp9_common.mk
+++ b/libvpx/vp9/vp9_common.mk
@@ -10,6 +10,7 @@
 
 VP9_COMMON_SRCS-yes += vp9_common.mk
 VP9_COMMON_SRCS-yes += vp9_iface_common.h
+VP9_COMMON_SRCS-yes += vp9_iface_common.c
 VP9_COMMON_SRCS-yes += common/vp9_ppflags.h
 VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c
 VP9_COMMON_SRCS-yes += common/vp9_blockd.c
diff --git a/libvpx/vp9/vp9_cx_iface.c b/libvpx/vp9/vp9_cx_iface.c
index 45e03f2de..f415e50f7 100644
--- a/libvpx/vp9/vp9_cx_iface.c
+++ b/libvpx/vp9/vp9_cx_iface.c
@@ -13,6 +13,7 @@
 
 #include "./vpx_config.h"
 #include "vpx/vpx_encoder.h"
+#include "vpx_dsp/psnr.h"
 #include "vpx_ports/vpx_once.h"
 #include "vpx_ports/system_state.h"
 #include "vpx_util/vpx_timestamp.h"
@@ -20,10 +21,14 @@
 #include "./vpx_version.h"
 #include "vp9/encoder/vp9_encoder.h"
 #include "vpx/vp8cx.h"
+#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/vp9_cx_iface.h"
 #include "vp9/encoder/vp9_firstpass.h"
+#include "vp9/encoder/vp9_lookahead.h"
+#include "vp9/vp9_cx_iface.h"
 #include "vp9/vp9_iface_common.h"
 
-struct vp9_extracfg {
+typedef struct vp9_extracfg {
   int cpu_used;  // available cpu percentage in 1/16
   unsigned int enable_auto_alt_ref;
   unsigned int noise_sensitivity;
@@ -55,7 +60,7 @@ struct vp9_extracfg {
   int render_height;
   unsigned int row_mt;
   unsigned int motion_vector_unit_test;
-};
+} vp9_extracfg;
 
 static struct vp9_extracfg default_extra_cfg = {
   0,                     // cpu_used
@@ -466,6 +471,15 @@ static void config_target_level(VP9EncoderConfig *oxcf) {
   }
 }
 
+static vpx_rational64_t get_g_timebase_in_ts(vpx_rational_t g_timebase) {
+  vpx_rational64_t g_timebase_in_ts;
+  g_timebase_in_ts.den = g_timebase.den;
+  g_timebase_in_ts.num = g_timebase.num;
+  g_timebase_in_ts.num *= TICKS_PER_SEC;
+  reduce_ratio(&g_timebase_in_ts);
+  return g_timebase_in_ts;
+}
+
 static vpx_codec_err_t set_encoder_config(
     VP9EncoderConfig *oxcf, const vpx_codec_enc_cfg_t *cfg,
     const struct vp9_extracfg *extra_cfg) {
@@ -477,9 +491,13 @@ static vpx_codec_err_t set_encoder_config(
   oxcf->height = cfg->g_h;
   oxcf->bit_depth = cfg->g_bit_depth;
   oxcf->input_bit_depth = cfg->g_input_bit_depth;
+  // TODO(angiebird): Figure out if we can just use g_timebase to indicate the
+  // inverse of framerate
   // guess a frame rate if out of whack, use 30
   oxcf->init_framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num;
   if (oxcf->init_framerate > 180) oxcf->init_framerate = 30;
+  oxcf->g_timebase = cfg->g_timebase;
+  oxcf->g_timebase_in_ts = get_g_timebase_in_ts(oxcf->g_timebase);
 
   oxcf->mode = GOOD;
 
@@ -539,10 +557,16 @@ static vpx_codec_err_t set_encoder_config(
   oxcf->speed = abs(extra_cfg->cpu_used);
   oxcf->encode_breakout = extra_cfg->static_thresh;
   oxcf->enable_auto_arf = extra_cfg->enable_auto_alt_ref;
-  oxcf->noise_sensitivity = extra_cfg->noise_sensitivity;
+  if (oxcf->bit_depth == VPX_BITS_8) {
+    oxcf->noise_sensitivity = extra_cfg->noise_sensitivity;
+  } else {
+    // Disable denoiser for high bitdepth since vp9_denoiser_filter only works
+    // for 8 bits.
+    oxcf->noise_sensitivity = 0;
+  }
   oxcf->sharpness = extra_cfg->sharpness;
 
-  oxcf->two_pass_stats_in = cfg->rc_twopass_stats_in;
+  vp9_set_first_pass_stats(oxcf, &cfg->rc_twopass_stats_in);
 
 #if CONFIG_FP_MB_STATS
   oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in;
@@ -611,40 +635,7 @@ static vpx_codec_err_t set_encoder_config(
   }
 
   if (get_level_index(oxcf->target_level) >= 0) config_target_level(oxcf);
-  /*
-  printf("Current VP9 Settings: \n");
-  printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
-  printf("target_level: %d\n", oxcf->target_level);
-  printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity);
-  printf("sharpness: %d\n",    oxcf->sharpness);
-  printf("cpu_used: %d\n",  oxcf->cpu_used);
-  printf("Mode: %d\n",     oxcf->mode);
-  printf("auto_key: %d\n",  oxcf->auto_key);
-  printf("key_freq: %d\n", oxcf->key_freq);
-  printf("end_usage: %d\n", oxcf->end_usage);
-  printf("under_shoot_pct: %d\n", oxcf->under_shoot_pct);
-  printf("over_shoot_pct: %d\n", oxcf->over_shoot_pct);
-  printf("starting_buffer_level: %d\n", oxcf->starting_buffer_level);
-  printf("optimal_buffer_level: %d\n",  oxcf->optimal_buffer_level);
-  printf("maximum_buffer_size: %d\n", oxcf->maximum_buffer_size);
-  printf("fixed_q: %d\n",  oxcf->fixed_q);
-  printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q);
-  printf("best_allowed_q: %d\n", oxcf->best_allowed_q);
-  printf("allow_spatial_resampling: %d\n", oxcf->allow_spatial_resampling);
-  printf("scaled_frame_width: %d\n", oxcf->scaled_frame_width);
-  printf("scaled_frame_height: %d\n", oxcf->scaled_frame_height);
-  printf("two_pass_vbrbias: %d\n",  oxcf->two_pass_vbrbias);
-  printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
-  printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
-  printf("vbr_corpus_complexity: %d\n",  oxcf->vbr_corpus_complexity);
-  printf("lag_in_frames: %d\n", oxcf->lag_in_frames);
-  printf("enable_auto_arf: %d\n", oxcf->enable_auto_arf);
-  printf("Version: %d\n", oxcf->Version);
-  printf("encode_breakout: %d\n", oxcf->encode_breakout);
-  printf("error resilient: %d\n", oxcf->error_resilient_mode);
-  printf("frame parallel detokenization: %d\n",
-         oxcf->frame_parallel_decoding_mode);
-  */
+  // vp9_dump_encoder_config(oxcf);
   return VPX_CODEC_OK;
 }
 
@@ -935,10 +926,9 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
 
     if (res == VPX_CODEC_OK) {
       priv->pts_offset_initialized = 0;
-      priv->timestamp_ratio.den = priv->cfg.g_timebase.den;
-      priv->timestamp_ratio.num = (int64_t)priv->cfg.g_timebase.num;
-      priv->timestamp_ratio.num *= TICKS_PER_SEC;
-      reduce_ratio(&priv->timestamp_ratio);
+      // TODO(angiebird): Replace priv->timestamp_ratio by
+      // oxcf->g_timebase_in_ts
+      priv->timestamp_ratio = get_g_timebase_in_ts(priv->cfg.g_timebase);
 
       set_encoder_config(&priv->oxcf, &priv->cfg, &priv->extra_cfg);
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -946,10 +936,7 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
           (ctx->init_flags & VPX_CODEC_USE_HIGHBITDEPTH) ? 1 : 0;
 #endif
       priv->cpi = vp9_create_compressor(&priv->oxcf, priv->buffer_pool);
-      if (priv->cpi == NULL)
-        res = VPX_CODEC_MEM_ERROR;
-      else
-        priv->cpi->output_pkt_list = &priv->pkt_list.head;
+      if (priv->cpi == NULL) res = VPX_CODEC_MEM_ERROR;
     }
   }
 
@@ -1067,18 +1054,6 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) {
   return index_sz;
 }
 
-static int64_t timebase_units_to_ticks(const vpx_rational64_t *timestamp_ratio,
-                                       int64_t n) {
-  return n * timestamp_ratio->num / timestamp_ratio->den;
-}
-
-static int64_t ticks_to_timebase_units(const vpx_rational64_t *timestamp_ratio,
-                                       int64_t n) {
-  int64_t round = timestamp_ratio->num / 2;
-  if (round > 0) --round;
-  return (n * timestamp_ratio->den + round) / timestamp_ratio->num;
-}
-
 static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi,
                                                    unsigned int lib_flags) {
   vpx_codec_frame_flags_t flags = lib_flags << 16;
@@ -1096,6 +1071,27 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi,
   return flags;
 }
 
+static INLINE vpx_codec_cx_pkt_t get_psnr_pkt(const PSNR_STATS *psnr) {
+  vpx_codec_cx_pkt_t pkt;
+  pkt.kind = VPX_CODEC_PSNR_PKT;
+  pkt.data.psnr = *psnr;
+  return pkt;
+}
+
+#if !CONFIG_REALTIME_ONLY
+static INLINE vpx_codec_cx_pkt_t
+get_first_pass_stats_pkt(FIRSTPASS_STATS *stats) {
+  // WARNNING: This function assumes that stats will
+  // exist and not be changed until the packet is processed
+  // TODO(angiebird): Refactor the code to avoid using the assumption.
+  vpx_codec_cx_pkt_t pkt;
+  pkt.kind = VPX_CODEC_STATS_PKT;
+  pkt.data.twopass_stats.buf = stats;
+  pkt.data.twopass_stats.sz = sizeof(*stats);
+  return pkt;
+}
+#endif
+
 const size_t kMinCompressedSize = 8192;
 static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
                                       const vpx_image_t *img,
@@ -1109,19 +1105,11 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
   VP9_COMP *const cpi = ctx->cpi;
   const vpx_rational64_t *const timestamp_ratio = &ctx->timestamp_ratio;
   size_t data_sz;
+  vpx_codec_cx_pkt_t pkt;
+  memset(&pkt, 0, sizeof(pkt));
 
   if (cpi == NULL) return VPX_CODEC_INVALID_PARAM;
 
-  if (cpi->oxcf.pass == 2 && cpi->level_constraint.level_index >= 0 &&
-      !cpi->level_constraint.rc_config_updated) {
-    const VP9EncoderConfig *const oxcf = &cpi->oxcf;
-    TWO_PASS *const twopass = &cpi->twopass;
-    FIRSTPASS_STATS *stats = &twopass->total_stats;
-    twopass->bits_left =
-        (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
-    cpi->level_constraint.rc_config_updated = 1;
-  }
-
   if (img != NULL) {
     res = validate_img(ctx, img);
     if (res == VPX_CODEC_OK) {
@@ -1223,92 +1211,135 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
       }
     }
 
-    while (cx_data_sz >= ctx->cx_data_sz / 2 &&
-           -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
-                                         &dst_time_stamp, &dst_end_time_stamp,
-                                         !img)) {
-      if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) {
-        vpx_codec_cx_pkt_t pkt;
-
-        // Pack invisible frames with the next visible frame
-        if (!cpi->common.show_frame ||
-            (cpi->use_svc &&
-             cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)) {
-          if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
-          ctx->pending_cx_data_sz += size;
-          if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
-          ctx->pending_frame_magnitude |= size;
-          cx_data += size;
-          cx_data_sz -= size;
+    if (cpi->oxcf.pass == 1 && !cpi->use_svc) {
+#if !CONFIG_REALTIME_ONLY
+      // compute first pass stats
+      if (img) {
+        int ret;
+        ENCODE_FRAME_RESULT encode_frame_result;
+        vpx_codec_cx_pkt_t fps_pkt;
+        // TODO(angiebird): Call vp9_first_pass directly
+        ret = vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
+                                      &dst_time_stamp, &dst_end_time_stamp,
+                                      !img, &encode_frame_result);
+        assert(size == 0);  // There is no compressed data in the first pass
+        (void)ret;
+        assert(ret == 0);
+        fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.this_frame_stats);
+        vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt);
+      } else {
+        if (!cpi->twopass.first_pass_done) {
+          vpx_codec_cx_pkt_t fps_pkt;
+          vp9_end_first_pass(cpi);
+          fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.total_stats);
+          vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt);
+        }
+      }
+#else   // !CONFIG_REALTIME_ONLY
+      assert(0);
+#endif  // !CONFIG_REALTIME_ONLY
+    } else {
+      ENCODE_FRAME_RESULT encode_frame_result;
+      while (cx_data_sz >= ctx->cx_data_sz / 2 &&
+             -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
+                                           &dst_time_stamp, &dst_end_time_stamp,
+                                           !img, &encode_frame_result)) {
+        // Pack psnr pkt
+        if (size > 0 && !cpi->use_svc) {
+          // TODO(angiebird): Figure out while we don't need psnr pkt when
+          // use_svc is on
+          PSNR_STATS psnr;
+          if (vp9_get_psnr(cpi, &psnr)) {
+            vpx_codec_cx_pkt_t psnr_pkt = get_psnr_pkt(&psnr);
+            vpx_codec_pkt_list_add(&ctx->pkt_list.head, &psnr_pkt);
+          }
+        }
+
+        if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) {
+          // Pack invisible frames with the next visible frame
+          if (!cpi->common.show_frame ||
+              (cpi->use_svc && cpi->svc.spatial_layer_id <
+                                   cpi->svc.number_spatial_layers - 1)) {
+            if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
+            ctx->pending_cx_data_sz += size;
+            if (size)
+              ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+            ctx->pending_frame_magnitude |= size;
+            cx_data += size;
+            cx_data_sz -= size;
+            pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
+            pkt.data.frame.height[cpi->svc.spatial_layer_id] =
+                cpi->common.height;
+            pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
+                1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
+
+            if (ctx->output_cx_pkt_cb.output_cx_pkt) {
+              pkt.kind = VPX_CODEC_CX_FRAME_PKT;
+              pkt.data.frame.pts =
+                  ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
+                  ctx->pts_offset;
+              pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
+                  timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
+              pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
+              pkt.data.frame.buf = ctx->pending_cx_data;
+              pkt.data.frame.sz = size;
+              ctx->pending_cx_data = NULL;
+              ctx->pending_cx_data_sz = 0;
+              ctx->pending_frame_count = 0;
+              ctx->pending_frame_magnitude = 0;
+              ctx->output_cx_pkt_cb.output_cx_pkt(
+                  &pkt, ctx->output_cx_pkt_cb.user_priv);
+            }
+            continue;
+          }
+
+          // Add the frame packet to the list of returned packets.
+          pkt.kind = VPX_CODEC_CX_FRAME_PKT;
+          pkt.data.frame.pts =
+              ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
+              ctx->pts_offset;
+          pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
+              timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
+          pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
           pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
           pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
           pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
               1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
 
-          if (ctx->output_cx_pkt_cb.output_cx_pkt) {
-            pkt.kind = VPX_CODEC_CX_FRAME_PKT;
-            pkt.data.frame.pts =
-                ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
-                ctx->pts_offset;
-            pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
-                timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
-            pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
+          if (ctx->pending_cx_data) {
+            if (size)
+              ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+            ctx->pending_frame_magnitude |= size;
+            ctx->pending_cx_data_sz += size;
+            // write the superframe only for the case when
+            if (!ctx->output_cx_pkt_cb.output_cx_pkt)
+              size += write_superframe_index(ctx);
             pkt.data.frame.buf = ctx->pending_cx_data;
-            pkt.data.frame.sz = size;
+            pkt.data.frame.sz = ctx->pending_cx_data_sz;
             ctx->pending_cx_data = NULL;
             ctx->pending_cx_data_sz = 0;
             ctx->pending_frame_count = 0;
             ctx->pending_frame_magnitude = 0;
+          } else {
+            pkt.data.frame.buf = cx_data;
+            pkt.data.frame.sz = size;
+          }
+          pkt.data.frame.partition_id = -1;
+
+          if (ctx->output_cx_pkt_cb.output_cx_pkt)
             ctx->output_cx_pkt_cb.output_cx_pkt(
                 &pkt, ctx->output_cx_pkt_cb.user_priv);
-          }
-          continue;
-        }
+          else
+            vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
 
-        // Add the frame packet to the list of returned packets.
-        pkt.kind = VPX_CODEC_CX_FRAME_PKT;
-        pkt.data.frame.pts =
-            ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
-            ctx->pts_offset;
-        pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
-            timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
-        pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
-        pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
-        pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
-        pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
-            1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
-
-        if (ctx->pending_cx_data) {
-          if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
-          ctx->pending_frame_magnitude |= size;
-          ctx->pending_cx_data_sz += size;
-          // write the superframe only for the case when
-          if (!ctx->output_cx_pkt_cb.output_cx_pkt)
-            size += write_superframe_index(ctx);
-          pkt.data.frame.buf = ctx->pending_cx_data;
-          pkt.data.frame.sz = ctx->pending_cx_data_sz;
-          ctx->pending_cx_data = NULL;
-          ctx->pending_cx_data_sz = 0;
-          ctx->pending_frame_count = 0;
-          ctx->pending_frame_magnitude = 0;
-        } else {
-          pkt.data.frame.buf = cx_data;
-          pkt.data.frame.sz = size;
-        }
-        pkt.data.frame.partition_id = -1;
-
-        if (ctx->output_cx_pkt_cb.output_cx_pkt)
-          ctx->output_cx_pkt_cb.output_cx_pkt(&pkt,
-                                              ctx->output_cx_pkt_cb.user_priv);
-        else
-          vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
-
-        cx_data += size;
-        cx_data_sz -= size;
-        if (is_one_pass_cbr_svc(cpi) &&
-            (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
-          // Encoded all spatial layers; exit loop.
-          break;
+          cx_data += size;
+          cx_data_sz -= size;
+          if (is_one_pass_cbr_svc(cpi) &&
+              (cpi->svc.spatial_layer_id ==
+               cpi->svc.number_spatial_layers - 1)) {
+            // Encoded all spatial layers; exit loop.
+            break;
+          }
         }
       }
     }
@@ -1765,7 +1796,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
         VPX_VBR,      // rc_end_usage
         { NULL, 0 },  // rc_twopass_stats_in
         { NULL, 0 },  // rc_firstpass_mb_stats_in
-        256,          // rc_target_bandwidth
+        256,          // rc_target_bitrate
         0,            // rc_min_quantizer
         63,           // rc_max_quantizer
         25,           // rc_undershoot_pct
@@ -1831,3 +1862,222 @@ CODEC_INTERFACE(vpx_codec_vp9_cx) = {
       NULL                    // vpx_codec_enc_mr_get_mem_loc_fn_t
   }
 };
+
+static vpx_codec_enc_cfg_t get_enc_cfg(int frame_width, int frame_height,
+                                       vpx_rational_t frame_rate,
+                                       int target_bitrate,
+                                       vpx_enc_pass enc_pass) {
+  vpx_codec_enc_cfg_t enc_cfg = encoder_usage_cfg_map[0].cfg;
+  enc_cfg.g_w = frame_width;
+  enc_cfg.g_h = frame_height;
+  enc_cfg.rc_target_bitrate = target_bitrate;
+  enc_cfg.g_pass = enc_pass;
+  // g_timebase is the inverse of frame_rate
+  enc_cfg.g_timebase.num = frame_rate.den;
+  enc_cfg.g_timebase.den = frame_rate.num;
+  return enc_cfg;
+}
+
+static vp9_extracfg get_extra_cfg() {
+  vp9_extracfg extra_cfg = default_extra_cfg;
+  return extra_cfg;
+}
+
+VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height,
+                                        vpx_rational_t frame_rate,
+                                        int target_bitrate,
+                                        vpx_enc_pass enc_pass) {
+  /* This function will generate the same VP9EncoderConfig used by the
+   * vpxenc command given below.
+   * The configs in the vpxenc command corresponds to parameters of
+   * vp9_get_encoder_config() as follows.
+   *
+   * WIDTH:   frame_width
+   * HEIGHT:  frame_height
+   * FPS:     frame_rate
+   * BITRATE: target_bitrate
+   *
+   * INPUT, OUTPUT, LIMIT will not affect VP9EncoderConfig
+   *
+   * vpxenc command:
+   * INPUT=bus_cif.y4m
+   * OUTPUT=output.webm
+   * WIDTH=352
+   * HEIGHT=288
+   * BITRATE=600
+   * FPS=30/1
+   * LIMIT=150
+   * ./vpxenc --limit=$LIMIT --width=$WIDTH --height=$HEIGHT --fps=$FPS
+   * --lag-in-frames=25 \
+   *  --codec=vp9 --good --cpu-used=0 --threads=0 --profile=0 \
+   *  --min-q=0 --max-q=63 --auto-alt-ref=1 --passes=2 --kf-max-dist=150 \
+   *  --kf-min-dist=0 --drop-frame=0 --static-thresh=0 --bias-pct=50 \
+   *  --minsection-pct=0 --maxsection-pct=150 --arnr-maxframes=7 --psnr \
+   *  --arnr-strength=5 --sharpness=0 --undershoot-pct=100 --overshoot-pct=100 \
+   *  --frame-parallel=0 --tile-columns=0 --cpu-used=0 --end-usage=vbr \
+   *  --target-bitrate=$BITRATE -o $OUTPUT $INPUT
+   */
+
+  VP9EncoderConfig oxcf;
+  vp9_extracfg extra_cfg = get_extra_cfg();
+  vpx_codec_enc_cfg_t enc_cfg = get_enc_cfg(
+      frame_width, frame_height, frame_rate, target_bitrate, enc_pass);
+  set_encoder_config(&oxcf, &enc_cfg, &extra_cfg);
+
+  // These settings are made to match the settings of the vpxenc command.
+  oxcf.key_freq = 150;
+  oxcf.under_shoot_pct = 100;
+  oxcf.over_shoot_pct = 100;
+  oxcf.max_threads = 0;
+  oxcf.tile_columns = 0;
+  oxcf.frame_parallel_decoding_mode = 0;
+  oxcf.two_pass_vbrmax_section = 150;
+  return oxcf;
+}
+
+#define DUMP_STRUCT_VALUE(struct, value) \
+  printf(#value " %" PRId64 "\n", (int64_t)(struct)->value)
+
+void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf) {
+  DUMP_STRUCT_VALUE(oxcf, profile);
+  DUMP_STRUCT_VALUE(oxcf, bit_depth);
+  DUMP_STRUCT_VALUE(oxcf, width);
+  DUMP_STRUCT_VALUE(oxcf, height);
+  DUMP_STRUCT_VALUE(oxcf, input_bit_depth);
+  DUMP_STRUCT_VALUE(oxcf, init_framerate);
+  // TODO(angiebird): dump g_timebase
+  // TODO(angiebird): dump g_timebase_in_ts
+
+  DUMP_STRUCT_VALUE(oxcf, target_bandwidth);
+
+  DUMP_STRUCT_VALUE(oxcf, noise_sensitivity);
+  DUMP_STRUCT_VALUE(oxcf, sharpness);
+  DUMP_STRUCT_VALUE(oxcf, speed);
+  DUMP_STRUCT_VALUE(oxcf, rc_max_intra_bitrate_pct);
+  DUMP_STRUCT_VALUE(oxcf, rc_max_inter_bitrate_pct);
+  DUMP_STRUCT_VALUE(oxcf, gf_cbr_boost_pct);
+
+  DUMP_STRUCT_VALUE(oxcf, mode);
+  DUMP_STRUCT_VALUE(oxcf, pass);
+
+  // Key Framing Operations
+  DUMP_STRUCT_VALUE(oxcf, auto_key);
+  DUMP_STRUCT_VALUE(oxcf, key_freq);
+
+  DUMP_STRUCT_VALUE(oxcf, lag_in_frames);
+
+  // ----------------------------------------------------------------
+  // DATARATE CONTROL OPTIONS
+
+  // vbr, cbr, constrained quality or constant quality
+  DUMP_STRUCT_VALUE(oxcf, rc_mode);
+
+  // buffer targeting aggressiveness
+  DUMP_STRUCT_VALUE(oxcf, under_shoot_pct);
+  DUMP_STRUCT_VALUE(oxcf, over_shoot_pct);
+
+  // buffering parameters
+  // TODO(angiebird): dump tarting_buffer_level_ms
+  // TODO(angiebird): dump ptimal_buffer_level_ms
+  // TODO(angiebird): dump maximum_buffer_size_ms
+
+  // Frame drop threshold.
+  DUMP_STRUCT_VALUE(oxcf, drop_frames_water_mark);
+
+  // controlling quality
+  DUMP_STRUCT_VALUE(oxcf, fixed_q);
+  DUMP_STRUCT_VALUE(oxcf, worst_allowed_q);
+  DUMP_STRUCT_VALUE(oxcf, best_allowed_q);
+  DUMP_STRUCT_VALUE(oxcf, cq_level);
+  DUMP_STRUCT_VALUE(oxcf, aq_mode);
+
+  // Special handling of Adaptive Quantization for AltRef frames
+  DUMP_STRUCT_VALUE(oxcf, alt_ref_aq);
+
+  // Internal frame size scaling.
+  DUMP_STRUCT_VALUE(oxcf, resize_mode);
+  DUMP_STRUCT_VALUE(oxcf, scaled_frame_width);
+  DUMP_STRUCT_VALUE(oxcf, scaled_frame_height);
+
+  // Enable feature to reduce the frame quantization every x frames.
+  DUMP_STRUCT_VALUE(oxcf, frame_periodic_boost);
+
+  // two pass datarate control
+  DUMP_STRUCT_VALUE(oxcf, two_pass_vbrbias);
+  DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmin_section);
+  DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmax_section);
+  DUMP_STRUCT_VALUE(oxcf, vbr_corpus_complexity);
+  // END DATARATE CONTROL OPTIONS
+  // ----------------------------------------------------------------
+
+  // Spatial and temporal scalability.
+  DUMP_STRUCT_VALUE(oxcf, ss_number_layers);
+  DUMP_STRUCT_VALUE(oxcf, ts_number_layers);
+
+  // Bitrate allocation for spatial layers.
+  // TODO(angiebird): dump layer_target_bitrate[VPX_MAX_LAYERS]
+  // TODO(angiebird): dump ss_target_bitrate[VPX_SS_MAX_LAYERS]
+  // TODO(angiebird): dump ss_enable_auto_arf[VPX_SS_MAX_LAYERS]
+  // TODO(angiebird): dump ts_rate_decimator[VPX_TS_MAX_LAYERS]
+
+  DUMP_STRUCT_VALUE(oxcf, enable_auto_arf);
+  DUMP_STRUCT_VALUE(oxcf, encode_breakout);
+  DUMP_STRUCT_VALUE(oxcf, error_resilient_mode);
+  DUMP_STRUCT_VALUE(oxcf, frame_parallel_decoding_mode);
+
+  DUMP_STRUCT_VALUE(oxcf, arnr_max_frames);
+  DUMP_STRUCT_VALUE(oxcf, arnr_strength);
+
+  DUMP_STRUCT_VALUE(oxcf, min_gf_interval);
+  DUMP_STRUCT_VALUE(oxcf, max_gf_interval);
+
+  DUMP_STRUCT_VALUE(oxcf, tile_columns);
+  DUMP_STRUCT_VALUE(oxcf, tile_rows);
+
+  DUMP_STRUCT_VALUE(oxcf, enable_tpl_model);
+
+  DUMP_STRUCT_VALUE(oxcf, max_threads);
+
+  DUMP_STRUCT_VALUE(oxcf, target_level);
+
+  // TODO(angiebird): dump two_pass_stats_in
+
+#if CONFIG_FP_MB_STATS
+  // TODO(angiebird): dump firstpass_mb_stats_in
+#endif
+
+  DUMP_STRUCT_VALUE(oxcf, tuning);
+  DUMP_STRUCT_VALUE(oxcf, content);
+#if CONFIG_VP9_HIGHBITDEPTH
+  DUMP_STRUCT_VALUE(oxcf, use_highbitdepth);
+#endif
+  DUMP_STRUCT_VALUE(oxcf, color_space);
+  DUMP_STRUCT_VALUE(oxcf, color_range);
+  DUMP_STRUCT_VALUE(oxcf, render_width);
+  DUMP_STRUCT_VALUE(oxcf, render_height);
+  DUMP_STRUCT_VALUE(oxcf, temporal_layering_mode);
+
+  DUMP_STRUCT_VALUE(oxcf, row_mt);
+  DUMP_STRUCT_VALUE(oxcf, motion_vector_unit_test);
+}
+
+FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf) {
+  FRAME_INFO frame_info;
+  int dummy;
+  frame_info.frame_width = oxcf->width;
+  frame_info.frame_height = oxcf->height;
+  frame_info.render_frame_width = oxcf->width;
+  frame_info.render_frame_height = oxcf->height;
+  frame_info.bit_depth = oxcf->bit_depth;
+  vp9_set_mi_size(&frame_info.mi_rows, &frame_info.mi_cols, &dummy,
+                  frame_info.frame_width, frame_info.frame_height);
+  vp9_set_mb_size(&frame_info.mb_rows, &frame_info.mb_cols, &frame_info.num_mbs,
+                  frame_info.mi_rows, frame_info.mi_cols);
+  // TODO(angiebird): Figure out how to get subsampling_x/y here
+  return frame_info;
+}
+
+void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf,
+                              const vpx_fixed_buf_t *stats) {
+  oxcf->two_pass_stats_in = *stats;
+}
diff --git a/libvpx/vp9/vp9_cx_iface.h b/libvpx/vp9/vp9_cx_iface.h
new file mode 100644
index 000000000..08569fcc9
--- /dev/null
+++ b/libvpx/vp9/vp9_cx_iface.h
@@ -0,0 +1,48 @@
+/*
+ *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_VP9_CX_IFACE_H_
+#define VPX_VP9_VP9_CX_IFACE_H_
+#include "vp9/encoder/vp9_encoder.h"
+#include "vp9/common/vp9_onyxc_int.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height,
+                                        vpx_rational_t frame_rate,
+                                        int target_bitrate,
+                                        vpx_enc_pass enc_pass);
+
+void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf);
+
+FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf);
+
+static INLINE int64_t
+timebase_units_to_ticks(const vpx_rational64_t *timestamp_ratio, int64_t n) {
+  return n * timestamp_ratio->num / timestamp_ratio->den;
+}
+
+static INLINE int64_t
+ticks_to_timebase_units(const vpx_rational64_t *timestamp_ratio, int64_t n) {
+  int64_t round = timestamp_ratio->num / 2;
+  if (round > 0) --round;
+  return (n * timestamp_ratio->den + round) / timestamp_ratio->num;
+}
+
+void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf,
+                              const vpx_fixed_buf_t *stats);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VP9_VP9_CX_IFACE_H_
diff --git a/libvpx/vp9/vp9_iface_common.c b/libvpx/vp9/vp9_iface_common.c
new file mode 100644
index 000000000..74d08a587
--- /dev/null
+++ b/libvpx/vp9/vp9_iface_common.c
@@ -0,0 +1,131 @@
+/*
+ *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed  by a BSD-style license that can be
+ *  found in the LICENSE file in the root of the source tree. An additional
+ *  intellectual property  rights grant can  be found in the  file PATENTS.
+ *  All contributing  project authors may be  found in the AUTHORS  file in
+ *  the root of the source tree.
+ */
+
+#include "vp9/vp9_iface_common.h"
+void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
+                     void *user_priv) {
+  /** vpx_img_wrap() doesn't allow specifying independent strides for
+   * the Y, U, and V planes, nor other alignment adjustments that
+   * might be representable by a YV12_BUFFER_CONFIG, so we just
+   * initialize all the fields.*/
+  int bps;
+  if (!yv12->subsampling_y) {
+    if (!yv12->subsampling_x) {
+      img->fmt = VPX_IMG_FMT_I444;
+      bps = 24;
+    } else {
+      img->fmt = VPX_IMG_FMT_I422;
+      bps = 16;
+    }
+  } else {
+    if (!yv12->subsampling_x) {
+      img->fmt = VPX_IMG_FMT_I440;
+      bps = 16;
+    } else {
+      img->fmt = VPX_IMG_FMT_I420;
+      bps = 12;
+    }
+  }
+  img->cs = yv12->color_space;
+  img->range = yv12->color_range;
+  img->bit_depth = 8;
+  img->w = yv12->y_stride;
+  img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
+  img->d_w = yv12->y_crop_width;
+  img->d_h = yv12->y_crop_height;
+  img->r_w = yv12->render_width;
+  img->r_h = yv12->render_height;
+  img->x_chroma_shift = yv12->subsampling_x;
+  img->y_chroma_shift = yv12->subsampling_y;
+  img->planes[VPX_PLANE_Y] = yv12->y_buffer;
+  img->planes[VPX_PLANE_U] = yv12->u_buffer;
+  img->planes[VPX_PLANE_V] = yv12->v_buffer;
+  img->planes[VPX_PLANE_ALPHA] = NULL;
+  img->stride[VPX_PLANE_Y] = yv12->y_stride;
+  img->stride[VPX_PLANE_U] = yv12->uv_stride;
+  img->stride[VPX_PLANE_V] = yv12->uv_stride;
+  img->stride[VPX_PLANE_ALPHA] = yv12->y_stride;
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) {
+    // vpx_image_t uses byte strides and a pointer to the first byte
+    // of the image.
+    img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH);
+    img->bit_depth = yv12->bit_depth;
+    img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer);
+    img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer);
+    img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer);
+    img->planes[VPX_PLANE_ALPHA] = NULL;
+    img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride;
+    img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride;
+    img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride;
+    img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  img->bps = bps;
+  img->user_priv = user_priv;
+  img->img_data = yv12->buffer_alloc;
+  img->img_data_owner = 0;
+  img->self_allocd = 0;
+}
+
+vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
+                                YV12_BUFFER_CONFIG *yv12) {
+  yv12->y_buffer = img->planes[VPX_PLANE_Y];
+  yv12->u_buffer = img->planes[VPX_PLANE_U];
+  yv12->v_buffer = img->planes[VPX_PLANE_V];
+
+  yv12->y_crop_width = img->d_w;
+  yv12->y_crop_height = img->d_h;
+  yv12->render_width = img->r_w;
+  yv12->render_height = img->r_h;
+  yv12->y_width = img->d_w;
+  yv12->y_height = img->d_h;
+
+  yv12->uv_width =
+      img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width;
+  yv12->uv_height =
+      img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height;
+  yv12->uv_crop_width = yv12->uv_width;
+  yv12->uv_crop_height = yv12->uv_height;
+
+  yv12->y_stride = img->stride[VPX_PLANE_Y];
+  yv12->uv_stride = img->stride[VPX_PLANE_U];
+  yv12->color_space = img->cs;
+  yv12->color_range = img->range;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
+    // In vpx_image_t
+    //     planes point to uint8 address of start of data
+    //     stride counts uint8s to reach next row
+    // In YV12_BUFFER_CONFIG
+    //     y_buffer, u_buffer, v_buffer point to uint16 address of data
+    //     stride and border counts in uint16s
+    // This means that all the address calculations in the main body of code
+    // should work correctly.
+    // However, before we do any pixel operations we need to cast the address
+    // to a uint16 ponter and double its value.
+    yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer);
+    yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer);
+    yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer);
+    yv12->y_stride >>= 1;
+    yv12->uv_stride >>= 1;
+    yv12->flags = YV12_FLAG_HIGHBITDEPTH;
+  } else {
+    yv12->flags = 0;
+  }
+  yv12->border = (yv12->y_stride - img->w) / 2;
+#else
+  yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  yv12->subsampling_x = img->x_chroma_shift;
+  yv12->subsampling_y = img->y_chroma_shift;
+  return VPX_CODEC_OK;
+}
diff --git a/libvpx/vp9/vp9_iface_common.h b/libvpx/vp9/vp9_iface_common.h
index a1921db63..e646917c6 100644
--- a/libvpx/vp9/vp9_iface_common.h
+++ b/libvpx/vp9/vp9_iface_common.h
@@ -10,130 +10,24 @@
 #ifndef VPX_VP9_VP9_IFACE_COMMON_H_
 #define VPX_VP9_VP9_IFACE_COMMON_H_
 
+#include <assert.h>
 #include "vpx_ports/mem.h"
+#include "vpx/vp8.h"
+#include "vpx_scale/yv12config.h"
+#include "common/vp9_enums.h"
 
-static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
-                            void *user_priv) {
-  /** vpx_img_wrap() doesn't allow specifying independent strides for
-   * the Y, U, and V planes, nor other alignment adjustments that
-   * might be representable by a YV12_BUFFER_CONFIG, so we just
-   * initialize all the fields.*/
-  int bps;
-  if (!yv12->subsampling_y) {
-    if (!yv12->subsampling_x) {
-      img->fmt = VPX_IMG_FMT_I444;
-      bps = 24;
-    } else {
-      img->fmt = VPX_IMG_FMT_I422;
-      bps = 16;
-    }
-  } else {
-    if (!yv12->subsampling_x) {
-      img->fmt = VPX_IMG_FMT_I440;
-      bps = 16;
-    } else {
-      img->fmt = VPX_IMG_FMT_I420;
-      bps = 12;
-    }
-  }
-  img->cs = yv12->color_space;
-  img->range = yv12->color_range;
-  img->bit_depth = 8;
-  img->w = yv12->y_stride;
-  img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
-  img->d_w = yv12->y_crop_width;
-  img->d_h = yv12->y_crop_height;
-  img->r_w = yv12->render_width;
-  img->r_h = yv12->render_height;
-  img->x_chroma_shift = yv12->subsampling_x;
-  img->y_chroma_shift = yv12->subsampling_y;
-  img->planes[VPX_PLANE_Y] = yv12->y_buffer;
-  img->planes[VPX_PLANE_U] = yv12->u_buffer;
-  img->planes[VPX_PLANE_V] = yv12->v_buffer;
-  img->planes[VPX_PLANE_ALPHA] = NULL;
-  img->stride[VPX_PLANE_Y] = yv12->y_stride;
-  img->stride[VPX_PLANE_U] = yv12->uv_stride;
-  img->stride[VPX_PLANE_V] = yv12->uv_stride;
-  img->stride[VPX_PLANE_ALPHA] = yv12->y_stride;
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) {
-    // vpx_image_t uses byte strides and a pointer to the first byte
-    // of the image.
-    img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH);
-    img->bit_depth = yv12->bit_depth;
-    img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer);
-    img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer);
-    img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer);
-    img->planes[VPX_PLANE_ALPHA] = NULL;
-    img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride;
-    img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride;
-    img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride;
-    img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride;
-  }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  img->bps = bps;
-  img->user_priv = user_priv;
-  img->img_data = yv12->buffer_alloc;
-  img->img_data_owner = 0;
-  img->self_allocd = 0;
-}
-
-static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
-                                       YV12_BUFFER_CONFIG *yv12) {
-  yv12->y_buffer = img->planes[VPX_PLANE_Y];
-  yv12->u_buffer = img->planes[VPX_PLANE_U];
-  yv12->v_buffer = img->planes[VPX_PLANE_V];
-
-  yv12->y_crop_width = img->d_w;
-  yv12->y_crop_height = img->d_h;
-  yv12->render_width = img->r_w;
-  yv12->render_height = img->r_h;
-  yv12->y_width = img->d_w;
-  yv12->y_height = img->d_h;
+#ifdef __cplusplus
+extern "C" {
+#endif
 
-  yv12->uv_width =
-      img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width;
-  yv12->uv_height =
-      img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height;
-  yv12->uv_crop_width = yv12->uv_width;
-  yv12->uv_crop_height = yv12->uv_height;
+void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
+                     void *user_priv);
 
-  yv12->y_stride = img->stride[VPX_PLANE_Y];
-  yv12->uv_stride = img->stride[VPX_PLANE_U];
-  yv12->color_space = img->cs;
-  yv12->color_range = img->range;
+vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
+                                YV12_BUFFER_CONFIG *yv12);
 
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
-    // In vpx_image_t
-    //     planes point to uint8 address of start of data
-    //     stride counts uint8s to reach next row
-    // In YV12_BUFFER_CONFIG
-    //     y_buffer, u_buffer, v_buffer point to uint16 address of data
-    //     stride and border counts in uint16s
-    // This means that all the address calculations in the main body of code
-    // should work correctly.
-    // However, before we do any pixel operations we need to cast the address
-    // to a uint16 ponter and double its value.
-    yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer);
-    yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer);
-    yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer);
-    yv12->y_stride >>= 1;
-    yv12->uv_stride >>= 1;
-    yv12->flags = YV12_FLAG_HIGHBITDEPTH;
-  } else {
-    yv12->flags = 0;
-  }
-  yv12->border = (yv12->y_stride - img->w) / 2;
-#else
-  yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  yv12->subsampling_x = img->x_chroma_shift;
-  yv12->subsampling_y = img->y_chroma_shift;
-  return VPX_CODEC_OK;
-}
-
-static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
+static INLINE VP9_REFFRAME
+ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
   switch (frame) {
     case VP8_LAST_FRAME: return VP9_LAST_FLAG;
     case VP8_GOLD_FRAME: return VP9_GOLD_FLAG;
@@ -142,4 +36,9 @@ static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
   assert(0 && "Invalid Reference Frame");
   return VP9_LAST_FLAG;
 }
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VPX_VP9_VP9_IFACE_COMMON_H_
diff --git a/libvpx/vp9/vp9cx.mk b/libvpx/vp9/vp9cx.mk
index 736ff0170..ad774505c 100644
--- a/libvpx/vp9/vp9cx.mk
+++ b/libvpx/vp9/vp9cx.mk
@@ -16,6 +16,10 @@ VP9_CX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes)
 VP9_CX_SRCS_REMOVE-no  += $(VP9_COMMON_SRCS_REMOVE-no)
 
 VP9_CX_SRCS-yes += vp9_cx_iface.c
+VP9_CX_SRCS-yes += vp9_cx_iface.h
+
+VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.cc
+VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.h
 
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
 VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
@@ -76,6 +80,8 @@ VP9_CX_SRCS-yes += encoder/vp9_resize.c
 VP9_CX_SRCS-yes += encoder/vp9_resize.h
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.c
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.h
+VP9_CX_SRCS-$(CONFIG_NON_GREEDY_MV) += encoder/vp9_non_greedy_mv.c
+VP9_CX_SRCS-$(CONFIG_NON_GREEDY_MV) += encoder/vp9_non_greedy_mv.h
 
 VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
 VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
@@ -116,7 +122,7 @@ endif
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
 
-ifeq ($(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86_64),yes)
 VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
 endif