Update external/libvpx to v1.6.0

Change-Id: I9425a3d3c3524d43823bc89f9f03556420c3dd42
author: Johann <johannkoenig@google.com> 2016-07-21 12:09:52 -0700
committer: Johann <johannkoenig@google.com> 2016-07-21 12:09:52 -0700
commit: 68e1c830ade592be74773e249bf94e2bbfb50de7 (patch)
tree: 08299f7deb6079690f0a3d2118ef3882fa77bdc6 /libvpx/vp9/common
parent: 96ebd06cb9832f583f7c181ec886eade209524df (diff)
download: libvpx-68e1c830ade592be74773e249bf94e2bbfb50de7.tar.gz
32 files changed, 654 insertions, 833 deletions
diff --git a/libvpx/vp9/common/vp9_alloccommon.c b/libvpx/vp9/common/vp9_alloccommon.c
index 24c6c54ed..7dd1005d3 100644
--- a/libvpx/vp9/common/vp9_alloccommon.c
+++ b/libvpx/vp9/common/vp9_alloccommon.c
@@ -119,6 +119,20 @@ void vp9_free_context_buffers(VP9_COMMON *cm) {
   cm->lf.lfm = NULL;
 }
 
+
+int vp9_alloc_loop_filter(VP9_COMMON *cm) {
+  vpx_free(cm->lf.lfm);
+  // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region.  The
+  // stride and rows are rounded up / truncated to a multiple of 8.
+  cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3;
+  cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc(
+      ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride,
+      sizeof(*cm->lf.lfm));
+  if (!cm->lf.lfm)
+    return 1;
+  return 0;
+}
+
 int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
   int new_mi_size;
 
@@ -151,15 +165,8 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
     cm->above_context_alloc_cols = cm->mi_cols;
   }
 
-  vpx_free(cm->lf.lfm);
-
-  // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region.  The
-  // stride and rows are rounded up / truncated to a multiple of 8.
-  cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3;
-  cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc(
-      ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride,
-      sizeof(*cm->lf.lfm));
-  if (!cm->lf.lfm) goto fail;
+  if (vp9_alloc_loop_filter(cm))
+    goto fail;
 
   return 0;
 
diff --git a/libvpx/vp9/common/vp9_alloccommon.h b/libvpx/vp9/common/vp9_alloccommon.h
index c0e51a6ce..e53955b99 100644
--- a/libvpx/vp9/common/vp9_alloccommon.h
+++ b/libvpx/vp9/common/vp9_alloccommon.h
@@ -23,6 +23,7 @@ struct BufferPool;
 
 void vp9_remove_common(struct VP9Common *cm);
 
+int vp9_alloc_loop_filter(struct VP9Common *cm);
 int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height);
 void vp9_init_context_buffers(struct VP9Common *cm);
 void vp9_free_context_buffers(struct VP9Common *cm);
diff --git a/libvpx/vp9/common/vp9_blockd.c b/libvpx/vp9/common/vp9_blockd.c
index 0e104ee59..7bab27d4f 100644
--- a/libvpx/vp9/common/vp9_blockd.c
+++ b/libvpx/vp9/common/vp9_blockd.c
@@ -13,7 +13,7 @@
 PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi,
                                     const MODE_INFO *left_mi, int b) {
   if (b == 0 || b == 2) {
-    if (!left_mi || is_inter_block(&left_mi->mbmi))
+    if (!left_mi || is_inter_block(left_mi))
       return DC_PRED;
 
     return get_y_mode(left_mi, b + 1);
@@ -26,7 +26,7 @@ PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi,
 PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi,
                                      const MODE_INFO *above_mi, int b) {
   if (b == 0 || b == 1) {
-    if (!above_mi || is_inter_block(&above_mi->mbmi))
+    if (!above_mi || is_inter_block(above_mi))
       return DC_PRED;
 
     return get_y_mode(above_mi, b + 2);
@@ -40,12 +40,12 @@ void vp9_foreach_transformed_block_in_plane(
     const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
     foreach_transformed_block_visitor visit, void *arg) {
   const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi;
+  const MODE_INFO* mi = xd->mi[0];
   // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
   // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
   // transform size varies per plane, look it up in a common way.
-  const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd)
-                                : mbmi->tx_size;
+  const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd)
+                                : mi->tx_size;
   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h
index 61eb59162..3d26fb2b5 100644
--- a/libvpx/vp9/common/vp9_blockd.h
+++ b/libvpx/vp9/common/vp9_blockd.h
@@ -64,7 +64,7 @@ typedef struct {
 typedef int8_t MV_REFERENCE_FRAME;
 
 // This structure now relates to 8x8 block regions.
-typedef struct {
+typedef struct MODE_INFO {
   // Common for both INTER and INTRA blocks
   BLOCK_SIZE sb_type;
   PREDICTION_MODE mode;
@@ -82,24 +82,21 @@ typedef struct {
 
   // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
   int_mv mv[2];
-} MB_MODE_INFO;
 
-typedef struct MODE_INFO {
-  MB_MODE_INFO mbmi;
   b_mode_info bmi[4];
 } MODE_INFO;
 
 static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) {
-  return mi->mbmi.sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode
-                                      : mi->mbmi.mode;
+  return mi->sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode
+                                 : mi->mode;
 }
 
-static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) {
-  return mbmi->ref_frame[0] > INTRA_FRAME;
+static INLINE int is_inter_block(const MODE_INFO *mi) {
+  return mi->ref_frame[0] > INTRA_FRAME;
 }
 
-static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) {
-  return mbmi->ref_frame[1] > INTRA_FRAME;
+static INLINE int has_second_ref(const MODE_INFO *mi) {
+  return mi->ref_frame[1] > INTRA_FRAME;
 }
 
 PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi,
@@ -160,11 +157,9 @@ typedef struct macroblockd {
   MODE_INFO **mi;
   MODE_INFO *left_mi;
   MODE_INFO *above_mi;
-  MB_MODE_INFO *left_mbmi;
-  MB_MODE_INFO *above_mbmi;
 
-  int up_available;
-  int left_available;
+  unsigned int max_blocks_wide;
+  unsigned int max_blocks_high;
 
   const vpx_prob (*partition_probs)[PARTITION_TYPES - 1];
 
@@ -212,19 +207,19 @@ extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES];
 
 static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
                                   const MACROBLOCKD *xd) {
-  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const MODE_INFO *const mi = xd->mi[0];
 
-  if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mbmi))
+  if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi))
     return DCT_DCT;
 
-  return intra_mode_to_tx_type_lookup[mbmi->mode];
+  return intra_mode_to_tx_type_lookup[mi->mode];
 }
 
 static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
                                       const MACROBLOCKD *xd, int ib) {
   const MODE_INFO *const mi = xd->mi[0];
 
-  if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(&mi->mbmi))
+  if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi))
     return DCT_DCT;
 
   return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)];
@@ -242,9 +237,9 @@ static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize,
   }
 }
 
-static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi,
+static INLINE TX_SIZE get_uv_tx_size(const MODE_INFO *mi,
                                      const struct macroblockd_plane *pd) {
-  return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x,
+  return get_uv_tx_size_impl(mi->tx_size, mi->sb_type, pd->subsampling_x,
                              pd->subsampling_y);
 }
 
diff --git a/libvpx/vp9/common/vp9_common.h b/libvpx/vp9/common/vp9_common.h
index 76e7cd440..908fa80a3 100644
--- a/libvpx/vp9/common/vp9_common.h
+++ b/libvpx/vp9/common/vp9_common.h
@@ -67,7 +67,6 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
 
 #define VP9_FRAME_MARKER 0x2
 
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/libvpx/vp9/common/vp9_common_data.c b/libvpx/vp9/common/vp9_common_data.c
index a6dae6a1c..3409d0484 100644
--- a/libvpx/vp9/common/vp9_common_data.c
+++ b/libvpx/vp9/common/vp9_common_data.c
@@ -159,3 +159,18 @@ const struct {
   {0,  8 },  // 64X32 - {0b0000, 0b1000}
   {0,  0 },  // 64X64 - {0b0000, 0b0000}
 };
+
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+const uint8_t need_top_left[INTRA_MODES] = {
+    0,  // DC_PRED
+    0,  // V_PRED
+    0,  // H_PRED
+    0,  // D45_PRED
+    1,  // D135_PRED
+    1,  // D117_PRED
+    1,  // D153_PRED
+    0,  // D207_PRED
+    0,  // D63_PRED
+    1,  // TM_PRED
+};
+#endif  // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
diff --git a/libvpx/vp9/common/vp9_common_data.h b/libvpx/vp9/common/vp9_common_data.h
index 95a117961..0ae24dad5 100644
--- a/libvpx/vp9/common/vp9_common_data.h
+++ b/libvpx/vp9/common/vp9_common_data.h
@@ -33,6 +33,9 @@ extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
 extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES];
 extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES];
 extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+extern const uint8_t need_top_left[INTRA_MODES];
+#endif  // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/libvpx/vp9/common/vp9_debugmodes.c b/libvpx/vp9/common/vp9_debugmodes.c
index 3d80103d2..d9c1fd968 100644
--- a/libvpx/vp9/common/vp9_debugmodes.c
+++ b/libvpx/vp9/common/vp9_debugmodes.c
@@ -35,7 +35,7 @@ static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor,
     fprintf(file, "%c ", prefix);
     for (mi_col = 0; mi_col < cols; mi_col++) {
       fprintf(file, "%2d ",
-              *((int*) ((char *) (&mi[0]->mbmi) +
+              *((int*) ((char *) (mi[0]) +
                                   member_offset)));
       mi++;
     }
@@ -53,18 +53,18 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) {
   int rows = cm->mi_rows;
   int cols = cm->mi_cols;
 
-  print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type));
-  print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode));
-  print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0]));
-  print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
-  print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
+  print_mi_data(cm, mvs, "Partitions:", offsetof(MODE_INFO, sb_type));
+  print_mi_data(cm, mvs, "Modes:", offsetof(MODE_INFO, mode));
+  print_mi_data(cm, mvs, "Ref frame:", offsetof(MODE_INFO, ref_frame[0]));
+  print_mi_data(cm, mvs, "Transform:", offsetof(MODE_INFO, tx_size));
+  print_mi_data(cm, mvs, "UV Modes:", offsetof(MODE_INFO, uv_mode));
 
   // output skip infomation.
   log_frame_info(cm, "Skips:", mvs);
   for (mi_row = 0; mi_row < rows; mi_row++) {
     fprintf(mvs, "S ");
     for (mi_col = 0; mi_col < cols; mi_col++) {
-      fprintf(mvs, "%2d ", mi[0]->mbmi.skip);
+      fprintf(mvs, "%2d ", mi[0]->skip);
       mi++;
     }
     fprintf(mvs, "\n");
@@ -78,8 +78,8 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) {
   for (mi_row = 0; mi_row < rows; mi_row++) {
     fprintf(mvs, "V ");
     for (mi_col = 0; mi_col < cols; mi_col++) {
-      fprintf(mvs, "%4d:%4d ", mi[0]->mbmi.mv[0].as_mv.row,
-                               mi[0]->mbmi.mv[0].as_mv.col);
+      fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row,
+                               mi[0]->mv[0].as_mv.col);
       mi++;
     }
     fprintf(mvs, "\n");
diff --git a/libvpx/vp9/common/vp9_entropy.c b/libvpx/vp9/common/vp9_entropy.c
index 579857bc9..7b490af34 100644
--- a/libvpx/vp9/common/vp9_entropy.c
+++ b/libvpx/vp9/common/vp9_entropy.c
@@ -36,20 +36,6 @@ const vpx_prob vp9_cat6_prob[] = {
     254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129
 };
 #if CONFIG_VP9_HIGHBITDEPTH
-const vpx_prob vp9_cat1_prob_high10[] = { 159 };
-const vpx_prob vp9_cat2_prob_high10[] = { 165, 145 };
-const vpx_prob vp9_cat3_prob_high10[] = { 173, 148, 140 };
-const vpx_prob vp9_cat4_prob_high10[] = { 176, 155, 140, 135 };
-const vpx_prob vp9_cat5_prob_high10[] = { 180, 157, 141, 134, 130 };
-const vpx_prob vp9_cat6_prob_high10[] = {
-    255, 255, 254, 254, 254, 252, 249, 243,
-    230, 196, 177, 153, 140, 133, 130, 129
-};
-const vpx_prob vp9_cat1_prob_high12[] = { 159 };
-const vpx_prob vp9_cat2_prob_high12[] = { 165, 145 };
-const vpx_prob vp9_cat3_prob_high12[] = { 173, 148, 140 };
-const vpx_prob vp9_cat4_prob_high12[] = { 176, 155, 140, 135 };
-const vpx_prob vp9_cat5_prob_high12[] = { 180, 157, 141, 134, 130 };
 const vpx_prob vp9_cat6_prob_high12[] = {
     255, 255, 255, 255, 254, 254, 254, 252, 249,
     243, 230, 196, 177, 153, 140, 133, 130, 129
@@ -403,7 +389,6 @@ const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = {
   {255, 241, 243, 255, 236, 255, 252, 254},
   {255, 243, 245, 255, 237, 255, 252, 254},
   {255, 246, 247, 255, 239, 255, 253, 255},
-  {255, 246, 247, 255, 239, 255, 253, 255},
 };
 
 static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = {
@@ -743,8 +728,8 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = {
 };
 
 static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) {
-  memcpy(probs, vp9_pareto8_full[p = 0 ? 0 : p - 1],
-         MODEL_NODES * sizeof(vpx_prob));
+  assert(p != 0);
+  memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob));
 }
 
 void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) {
diff --git a/libvpx/vp9/common/vp9_entropy.h b/libvpx/vp9/common/vp9_entropy.h
index 21611ed6d..63b3bff5d 100644
--- a/libvpx/vp9/common/vp9_entropy.h
+++ b/libvpx/vp9/common/vp9_entropy.h
@@ -138,7 +138,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
 // 1, 3, 5, 7, ..., 253, 255
 // In between probabilities are interpolated linearly
 
-#define COEFF_PROB_MODELS 256
+#define COEFF_PROB_MODELS 255
 
 #define UNCONSTRAINED_NODES         3
 
diff --git a/libvpx/vp9/common/vp9_entropymv.c b/libvpx/vp9/common/vp9_entropymv.c
index 3acfe1448..566ae91cf 100644
--- a/libvpx/vp9/common/vp9_entropymv.c
+++ b/libvpx/vp9/common/vp9_entropymv.c
@@ -11,9 +11,6 @@
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_entropymv.h"
 
-// Integer pel reference mv threshold for use of high-precision 1/8 mv
-#define COMPANDED_MVREF_THRESH 8
-
 const vpx_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
   -MV_JOINT_ZERO, 2,
   -MV_JOINT_HNZVZ, 4,
@@ -127,11 +124,6 @@ MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
   return c;
 }
 
-int vp9_use_mv_hp(const MV *ref) {
-  return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
-         (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH;
-}
-
 static void inc_mv_component(int v, nmv_component_counts *comp_counts,
                              int incr, int usehp) {
   int s, z, c, o, d, e, f;
diff --git a/libvpx/vp9/common/vp9_entropymv.h b/libvpx/vp9/common/vp9_entropymv.h
index 8c817bf7b..2f05ad44b 100644
--- a/libvpx/vp9/common/vp9_entropymv.h
+++ b/libvpx/vp9/common/vp9_entropymv.h
@@ -27,7 +27,14 @@ struct VP9Common;
 void vp9_init_mv_probs(struct VP9Common *cm);
 
 void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp);
-int vp9_use_mv_hp(const MV *ref);
+
+// Integer pel reference mv threshold for use of high-precision 1/8 mv
+#define COMPANDED_MVREF_THRESH 8
+
+static INLINE int use_mv_hp(const MV *ref) {
+  return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
+         (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH;
+}
 
 #define MV_UPDATE_PROB 252
 
diff --git a/libvpx/vp9/common/vp9_idct.c b/libvpx/vp9/common/vp9_idct.c
index d12cd76db..1b420143b 100644
--- a/libvpx/vp9/common/vp9_idct.c
+++ b/libvpx/vp9/common/vp9_idct.c
@@ -174,6 +174,9 @@ void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
   else if (eob <= 34)
     // non-zero coeff only in upper-left 8x8
     vpx_idct32x32_34_add(input, dest, stride);
+  else if (eob <= 135)
+    // non-zero coeff only in upper-left 16x16
+    vpx_idct32x32_135_add(input, dest, stride);
   else
     vpx_idct32x32_1024_add(input, dest, stride);
 }
diff --git a/libvpx/vp9/common/vp9_loopfilter.c b/libvpx/vp9/common/vp9_loopfilter.c
index b8a113223..183dec4e7 100644
--- a/libvpx/vp9/common/vp9_loopfilter.c
+++ b/libvpx/vp9/common/vp9_loopfilter.c
@@ -232,9 +232,9 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
 }
 
 static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
-                                const MB_MODE_INFO *mbmi) {
-  return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]]
-                   [mode_lf_lut[mbmi->mode]];
+                                const MODE_INFO *mi) {
+  return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]]
+                   [mode_lf_lut[mi->mode]];
 }
 
 void vp9_loop_filter_init(VP9_COMMON *cm) {
@@ -298,200 +298,168 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
 
 static void filter_selectively_vert_row2(int subsampling_factor,
                                          uint8_t *s, int pitch,
-                                         unsigned int mask_16x16_l,
-                                         unsigned int mask_8x8_l,
-                                         unsigned int mask_4x4_l,
-                                         unsigned int mask_4x4_int_l,
-                                         const loop_filter_info_n *lfi_n,
+                                         unsigned int mask_16x16,
+                                         unsigned int mask_8x8,
+                                         unsigned int mask_4x4,
+                                         unsigned int mask_4x4_int,
+                                         const loop_filter_thresh *lfthr,
                                          const uint8_t *lfl) {
-  const int mask_shift = subsampling_factor ? 4 : 8;
-  const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+  const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
   const int lfl_forward = subsampling_factor ? 4 : 8;
-
-  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
-  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
-  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
-  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
-  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+  const unsigned int dual_one = 1 | (1 << lfl_forward);
   unsigned int mask;
-
-  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
-              mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
-       mask; mask >>= 1) {
-    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
-    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
-
-    // TODO(yunqingwang): count in loopfilter functions should be removed.
-    if (mask & 1) {
-      if ((mask_16x16_0 | mask_16x16_1) & 1) {
-        if ((mask_16x16_0 & mask_16x16_1) & 1) {
-          vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                   lfi0->hev_thr);
-        } else if (mask_16x16_0 & 1) {
-          vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
-                              lfi0->hev_thr);
+  uint8_t *ss[2];
+  ss[0] = s;
+
+  for (mask =
+           (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
+       mask; mask = (mask & ~dual_one) >> 1) {
+    if (mask & dual_one) {
+      const loop_filter_thresh *lfis[2];
+      lfis[0] = lfthr + *lfl;
+      lfis[1] = lfthr + *(lfl + lfl_forward);
+      ss[1] = ss[0] + 8 * pitch;
+
+      if (mask_16x16 & dual_one) {
+        if ((mask_16x16 & dual_one) == dual_one) {
+          vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+                                   lfis[0]->hev_thr);
         } else {
-          vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
-                              lfi1->lim, lfi1->hev_thr);
+          const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
+          vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
+                              lfi->lim, lfi->hev_thr);
         }
       }
 
-      if ((mask_8x8_0 | mask_8x8_1) & 1) {
-        if ((mask_8x8_0 & mask_8x8_1) & 1) {
-          vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                  lfi1->hev_thr);
-        } else if (mask_8x8_0 & 1) {
-          vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
-                             1);
+      if (mask_8x8 & dual_one) {
+        if ((mask_8x8 & dual_one) == dual_one) {
+          vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+                                  lfis[0]->hev_thr, lfis[1]->mblim,
+                                  lfis[1]->lim, lfis[1]->hev_thr);
         } else {
-          vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
-                             lfi1->hev_thr, 1);
+          const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
+          vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim,
+                             lfi->hev_thr);
         }
       }
 
-      if ((mask_4x4_0 | mask_4x4_1) & 1) {
-        if ((mask_4x4_0 & mask_4x4_1) & 1) {
-          vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                  lfi1->hev_thr);
-        } else if (mask_4x4_0 & 1) {
-          vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
-                             1);
+      if (mask_4x4 & dual_one) {
+        if ((mask_4x4 & dual_one) == dual_one) {
+          vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+                                  lfis[0]->hev_thr, lfis[1]->mblim,
+                                  lfis[1]->lim, lfis[1]->hev_thr);
         } else {
-          vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
-                             lfi1->hev_thr, 1);
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
+          vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim,
+                             lfi->hev_thr);
         }
       }
 
-      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
-        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
-          vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                  lfi1->hev_thr);
-        } else if (mask_4x4_int_0 & 1) {
-          vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                             lfi0->hev_thr, 1);
+      if (mask_4x4_int & dual_one) {
+        if ((mask_4x4_int & dual_one) == dual_one) {
+          vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
+                                  lfis[0]->lim, lfis[0]->hev_thr,
+                                  lfis[1]->mblim, lfis[1]->lim,
+                                  lfis[1]->hev_thr);
         } else {
-          vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
-                             lfi1->hev_thr, 1);
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
+          vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim,
+                             lfi->lim, lfi->hev_thr);
         }
       }
     }
 
-    s += 8;
+    ss[0] += 8;
     lfl += 1;
-    mask_16x16_0 >>= 1;
-    mask_8x8_0 >>= 1;
-    mask_4x4_0 >>= 1;
-    mask_4x4_int_0 >>= 1;
-    mask_16x16_1 >>= 1;
-    mask_8x8_1 >>= 1;
-    mask_4x4_1 >>= 1;
-    mask_4x4_int_1 >>= 1;
+    mask_16x16 >>= 1;
+    mask_8x8 >>= 1;
+    mask_4x4 >>= 1;
+    mask_4x4_int >>= 1;
   }
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
 static void highbd_filter_selectively_vert_row2(int subsampling_factor,
                                                 uint16_t *s, int pitch,
-                                                unsigned int mask_16x16_l,
-                                                unsigned int mask_8x8_l,
-                                                unsigned int mask_4x4_l,
-                                                unsigned int mask_4x4_int_l,
-                                                const loop_filter_info_n *lfi_n,
+                                                unsigned int mask_16x16,
+                                                unsigned int mask_8x8,
+                                                unsigned int mask_4x4,
+                                                unsigned int mask_4x4_int,
+                                                const loop_filter_thresh *lfthr,
                                                 const uint8_t *lfl, int bd) {
-  const int mask_shift = subsampling_factor ? 4 : 8;
-  const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+  const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
   const int lfl_forward = subsampling_factor ? 4 : 8;
-
-  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
-  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
-  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
-  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
-  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+  const unsigned int dual_one = 1 | (1 << lfl_forward);
   unsigned int mask;
-
-  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
-       mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
-       mask; mask >>= 1) {
-    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
-    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
-
-    // TODO(yunqingwang): count in loopfilter functions should be removed.
-    if (mask & 1) {
-      if ((mask_16x16_0 | mask_16x16_1) & 1) {
-        if ((mask_16x16_0 & mask_16x16_1) & 1) {
-          vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                          lfi0->hev_thr, bd);
-        } else if (mask_16x16_0 & 1) {
-          vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
-                                     lfi0->hev_thr, bd);
+  uint16_t *ss[2];
+  ss[0] = s;
+
+  for (mask =
+           (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
+       mask; mask = (mask & ~dual_one) >> 1) {
+    if (mask & dual_one) {
+      const loop_filter_thresh *lfis[2];
+      lfis[0] = lfthr + *lfl;
+      lfis[1] = lfthr + *(lfl + lfl_forward);
+      ss[1] = ss[0] + 8 * pitch;
+
+      if (mask_16x16 & dual_one) {
+        if ((mask_16x16 & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim,
+                                          lfis[0]->lim, lfis[0]->hev_thr, bd);
         } else {
-          vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
-                                     lfi1->lim, lfi1->hev_thr, bd);
+          const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
+          vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
+                                     lfi->lim, lfi->hev_thr, bd);
         }
       }
 
-      if ((mask_8x8_0 | mask_8x8_1) & 1) {
-        if ((mask_8x8_0 & mask_8x8_1) & 1) {
-          vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                         lfi1->hev_thr, bd);
-        } else if (mask_8x8_0 & 1) {
-          vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
-                                    lfi0->hev_thr, 1, bd);
+      if (mask_8x8 & dual_one) {
+        if ((mask_8x8 & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim,
+                                         lfis[0]->lim, lfis[0]->hev_thr,
+                                         lfis[1]->mblim, lfis[1]->lim,
+                                         lfis[1]->hev_thr, bd);
         } else {
-          vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
-                                    lfi1->lim, lfi1->hev_thr, 1, bd);
+          const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
+          vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim,
+                                    lfi->lim, lfi->hev_thr, bd);
         }
       }
 
-      if ((mask_4x4_0 | mask_4x4_1) & 1) {
-        if ((mask_4x4_0 & mask_4x4_1) & 1) {
-          vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                         lfi1->hev_thr, bd);
-        } else if (mask_4x4_0 & 1) {
-          vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
-                                    lfi0->hev_thr, 1, bd);
+      if (mask_4x4 & dual_one) {
+        if ((mask_4x4 & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim,
+                                         lfis[0]->lim, lfis[0]->hev_thr,
+                                         lfis[1]->mblim, lfis[1]->lim,
+                                         lfis[1]->hev_thr, bd);
         } else {
-          vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
-                                    lfi1->lim, lfi1->hev_thr, 1, bd);
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
+          vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim,
+                                    lfi->lim, lfi->hev_thr, bd);
         }
       }
 
-      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
-        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
-          vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                         lfi1->hev_thr, bd);
-        } else if (mask_4x4_int_0 & 1) {
-          vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                                    lfi0->hev_thr, 1, bd);
+      if (mask_4x4_int & dual_one) {
+        if ((mask_4x4_int & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
+                                         lfis[0]->lim, lfis[0]->hev_thr,
+                                         lfis[1]->mblim, lfis[1]->lim,
+                                         lfis[1]->hev_thr, bd);
         } else {
-          vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
-                                    lfi1->lim, lfi1->hev_thr, 1, bd);
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
+          vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch,
+                                    lfi->mblim, lfi->lim, lfi->hev_thr, bd);
         }
       }
     }
 
-    s += 8;
+    ss[0] += 8;
     lfl += 1;
-    mask_16x16_0 >>= 1;
-    mask_8x8_0 >>= 1;
-    mask_4x4_0 >>= 1;
-    mask_4x4_int_0 >>= 1;
-    mask_16x16_1 >>= 1;
-    mask_8x8_1 >>= 1;
-    mask_4x4_1 >>= 1;
-    mask_4x4_int_1 >>= 1;
+    mask_16x16 >>= 1;
+    mask_8x8 >>= 1;
+    mask_4x4 >>= 1;
+    mask_4x4_int >>= 1;
   }
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -501,30 +469,30 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
                                      unsigned int mask_8x8,
                                      unsigned int mask_4x4,
                                      unsigned int mask_4x4_int,
-                                     const loop_filter_info_n *lfi_n,
+                                     const loop_filter_thresh *lfthr,
                                      const uint8_t *lfl) {
   unsigned int mask;
   int count;
 
   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
        mask; mask >>= count) {
-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
-
     count = 1;
     if (mask & 1) {
+      const loop_filter_thresh *lfi = lfthr + *lfl;
+
       if (mask_16x16 & 1) {
         if ((mask_16x16 & 3) == 3) {
-          vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
-                                lfi->hev_thr, 2);
+          vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+                                     lfi->hev_thr);
           count = 2;
         } else {
-          vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
-                                lfi->hev_thr, 1);
+          vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr);
         }
       } else if (mask_8x8 & 1) {
         if ((mask_8x8 & 3) == 3) {
           // Next block's thresholds.
-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
 
           vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
                                     lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -537,23 +505,23 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
           } else {
             if (mask_4x4_int & 1)
               vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                   lfi->hev_thr, 1);
+                                   lfi->hev_thr);
             else if (mask_4x4_int & 2)
               vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
-                                   lfin->lim, lfin->hev_thr, 1);
+                                   lfin->lim, lfin->hev_thr);
           }
           count = 2;
         } else {
-          vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+          vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
 
           if (mask_4x4_int & 1)
             vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                 lfi->hev_thr, 1);
+                                 lfi->hev_thr);
         }
       } else if (mask_4x4 & 1) {
         if ((mask_4x4 & 3) == 3) {
           // Next block's thresholds.
-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
 
           vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
                                     lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -565,22 +533,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
           } else {
             if (mask_4x4_int & 1)
               vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                   lfi->hev_thr, 1);
+                                   lfi->hev_thr);
             else if (mask_4x4_int & 2)
               vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
-                                   lfin->lim, lfin->hev_thr, 1);
+                                   lfin->lim, lfin->hev_thr);
           }
           count = 2;
         } else {
-          vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+          vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
 
           if (mask_4x4_int & 1)
             vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                 lfi->hev_thr, 1);
+                                 lfi->hev_thr);
         }
-      } else if (mask_4x4_int & 1) {
+      } else {
         vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                             lfi->hev_thr, 1);
+                             lfi->hev_thr);
       }
     }
     s += 8 * count;
@@ -598,30 +566,30 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
                                             unsigned int mask_8x8,
                                             unsigned int mask_4x4,
                                             unsigned int mask_4x4_int,
-                                            const loop_filter_info_n *lfi_n,
+                                            const loop_filter_thresh *lfthr,
                                             const uint8_t *lfl, int bd) {
   unsigned int mask;
   int count;
 
   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
        mask; mask >>= count) {
-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
-
     count = 1;
     if (mask & 1) {
+      const loop_filter_thresh *lfi = lfthr + *lfl;
+
       if (mask_16x16 & 1) {
         if ((mask_16x16 & 3) == 3) {
-          vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
-                                       lfi->hev_thr, 2, bd);
+          vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+                                            lfi->hev_thr, bd);
           count = 2;
         } else {
-          vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
-                                       lfi->hev_thr, 1, bd);
+          vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+                                           lfi->hev_thr, bd);
         }
       } else if (mask_8x8 & 1) {
         if ((mask_8x8 & 3) == 3) {
           // Next block's thresholds.
-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
 
           vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
                                            lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -635,26 +603,26 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
           } else {
             if (mask_4x4_int & 1) {
               vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
-                                          lfi->lim, lfi->hev_thr, 1, bd);
+                                          lfi->lim, lfi->hev_thr, bd);
             } else if (mask_4x4_int & 2) {
               vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
-                                          lfin->lim, lfin->hev_thr, 1, bd);
+                                          lfin->lim, lfin->hev_thr, bd);
             }
           }
           count = 2;
         } else {
           vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
-                                      lfi->hev_thr, 1, bd);
+                                      lfi->hev_thr, bd);
 
           if (mask_4x4_int & 1) {
             vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
-                                        lfi->lim, lfi->hev_thr, 1, bd);
+                                        lfi->lim, lfi->hev_thr, bd);
           }
         }
       } else if (mask_4x4 & 1) {
         if ((mask_4x4 & 3) == 3) {
           // Next block's thresholds.
-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
 
           vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
                                            lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -667,25 +635,25 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
           } else {
             if (mask_4x4_int & 1) {
               vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
-                                          lfi->lim, lfi->hev_thr, 1, bd);
+                                          lfi->lim, lfi->hev_thr, bd);
             } else if (mask_4x4_int & 2) {
               vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
-                                          lfin->lim, lfin->hev_thr, 1, bd);
+                                          lfin->lim, lfin->hev_thr, bd);
             }
           }
           count = 2;
         } else {
           vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
-                                      lfi->hev_thr, 1, bd);
+                                      lfi->hev_thr, bd);
 
           if (mask_4x4_int & 1) {
             vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
-                                        lfi->lim, lfi->hev_thr, 1, bd);
+                                        lfi->lim, lfi->hev_thr, bd);
           }
         }
-      } else if (mask_4x4_int & 1) {
+      } else {
         vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                    lfi->hev_thr, 1, bd);
+                                    lfi->hev_thr, bd);
       }
     }
     s += 8 * count;
@@ -704,16 +672,14 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
 // whether there were any coefficients encoded, and the loop filter strength
 // block we are currently looking at. Shift is used to position the
 // 1's we produce.
-// TODO(JBB) Need another function for different resolution color..
 static void build_masks(const loop_filter_info_n *const lfi_n,
                         const MODE_INFO *mi, const int shift_y,
                         const int shift_uv,
                         LOOP_FILTER_MASK *lfm) {
-  const MB_MODE_INFO *mbmi = &mi->mbmi;
-  const BLOCK_SIZE block_size = mbmi->sb_type;
-  const TX_SIZE tx_size_y = mbmi->tx_size;
+  const BLOCK_SIZE block_size = mi->sb_type;
+  const TX_SIZE tx_size_y = mi->tx_size;
   const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1);
-  const int filter_level = get_filter_level(lfi_n, mbmi);
+  const int filter_level = get_filter_level(lfi_n, mi);
   uint64_t *const left_y = &lfm->left_y[tx_size_y];
   uint64_t *const above_y = &lfm->above_y[tx_size_y];
   uint64_t *const int_4x4_y = &lfm->int_4x4_y;
@@ -754,7 +720,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
 
   // If the block has no coefficients and is not intra we skip applying
   // the loop filter on block edges.
-  if (mbmi->skip && is_inter_block(mbmi))
+  if (mi->skip && is_inter_block(mi))
     return;
 
   // Here we are adding a mask for the transform size. The transform
@@ -788,10 +754,9 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
 static void build_y_mask(const loop_filter_info_n *const lfi_n,
                          const MODE_INFO *mi, const int shift_y,
                          LOOP_FILTER_MASK *lfm) {
-  const MB_MODE_INFO *mbmi = &mi->mbmi;
-  const BLOCK_SIZE block_size = mbmi->sb_type;
-  const TX_SIZE tx_size_y = mbmi->tx_size;
-  const int filter_level = get_filter_level(lfi_n, mbmi);
+  const BLOCK_SIZE block_size = mi->sb_type;
+  const TX_SIZE tx_size_y = mi->tx_size;
+  const int filter_level = get_filter_level(lfi_n, mi);
   uint64_t *const left_y = &lfm->left_y[tx_size_y];
   uint64_t *const above_y = &lfm->above_y[tx_size_y];
   uint64_t *const int_4x4_y = &lfm->int_4x4_y;
@@ -812,7 +777,7 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
   *above_y |= above_prediction_mask[block_size] << shift_y;
   *left_y |= left_prediction_mask[block_size] << shift_y;
 
-  if (mbmi->skip && is_inter_block(mbmi))
+  if (mi->skip && is_inter_block(mi))
     return;
 
   *above_y |= (size_mask[block_size] &
@@ -941,7 +906,6 @@ void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row,
 
 // This function sets up the bit masks for the entire 64x64 region represented
 // by mi_row, mi_col.
-// TODO(JBB): This function only works for yv12.
 void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
                     MODE_INFO **mi, const int mode_info_stride,
                     LOOP_FILTER_MASK *lfm) {
@@ -977,10 +941,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
   vp9_zero(*lfm);
   assert(mip[0] != NULL);
 
-  // TODO(jimbankoski): Try moving most of the following code into decode
-  // loop and storing lfm in the mbmi structure so that we don't have to go
-  // through the recursive loop structure multiple times.
-  switch (mip[0]->mbmi.sb_type) {
+  switch (mip[0]->sb_type) {
     case BLOCK_64X64:
       build_masks(lfi_n, mip[0] , 0, 0, lfm);
       break;
@@ -1006,7 +967,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
         const int mi_32_row_offset = ((idx_32 >> 1) << 2);
         if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
           continue;
-        switch (mip[0]->mbmi.sb_type) {
+        switch (mip[0]->sb_type) {
           case BLOCK_32X32:
             build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
             break;
@@ -1036,7 +997,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
               if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
                 continue;
 
-              switch (mip[0]->mbmi.sb_type) {
+              switch (mip[0]->sb_type) {
                 case BLOCK_16X16:
                   build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
                   break;
@@ -1083,8 +1044,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
       }
       break;
   }
-
-  vp9_adjust_mask(cm, mi_row, mi_col, lfm);
 }
 
 static void filter_selectively_vert(uint8_t *s, int pitch,
@@ -1092,25 +1051,25 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
                                     unsigned int mask_8x8,
                                     unsigned int mask_4x4,
                                     unsigned int mask_4x4_int,
-                                    const loop_filter_info_n *lfi_n,
+                                    const loop_filter_thresh *lfthr,
                                     const uint8_t *lfl) {
   unsigned int mask;
 
   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
        mask; mask >>= 1) {
-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+    const loop_filter_thresh *lfi = lfthr + *lfl;
 
     if (mask & 1) {
       if (mask_16x16 & 1) {
         vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
       } else if (mask_8x8 & 1) {
-        vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+        vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
       } else if (mask_4x4 & 1) {
-        vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+        vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
       }
     }
     if (mask_4x4_int & 1)
-      vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+      vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
     s += 8;
     lfl += 1;
     mask_16x16 >>= 1;
@@ -1126,13 +1085,13 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
                                            unsigned int mask_8x8,
                                            unsigned int mask_4x4,
                                            unsigned int mask_4x4_int,
-                                           const loop_filter_info_n *lfi_n,
+                                           const loop_filter_thresh *lfthr,
                                            const uint8_t *lfl, int bd) {
   unsigned int mask;
 
   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
        mask; mask >>= 1) {
-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+    const loop_filter_thresh *lfi = lfthr + *lfl;
 
     if (mask & 1) {
       if (mask_16x16 & 1) {
@@ -1140,15 +1099,15 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
                                    lfi->hev_thr, bd);
       } else if (mask_8x8 & 1) {
         vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
-                                  lfi->hev_thr, 1, bd);
+                                  lfi->hev_thr, bd);
       } else if (mask_4x4 & 1) {
         vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
-                                lfi->hev_thr, 1, bd);
+                                  lfi->hev_thr, bd);
       }
     }
     if (mask_4x4_int & 1)
       vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
-                                lfi->hev_thr, 1, bd);
+                                lfi->hev_thr, bd);
     s += 8;
     lfl += 1;
     mask_16x16 >>= 1;
@@ -1186,8 +1145,8 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
     // Determine the vertical edges that need filtering
     for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
       const MODE_INFO *mi = mi_8x8[c];
-      const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type;
-      const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi);
+      const BLOCK_SIZE sb_type = mi[0].sb_type;
+      const int skip_this = mi[0].skip && is_inter_block(mi);
       // left edge of current unit is block/partition edge -> no skip
       const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ?
           !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1;
@@ -1196,13 +1155,13 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
       const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ?
           !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1;
       const int skip_this_r = skip_this && !block_edge_above;
-      const TX_SIZE tx_size = get_uv_tx_size(&mi[0].mbmi, plane);
+      const TX_SIZE tx_size = get_uv_tx_size(mi, plane);
       const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
       const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
 
       // Filter level can vary per MI
       if (!(lfl[(r << 3) + (c >> ss_x)] =
-            get_filter_level(&cm->lf_info, &mi[0].mbmi)))
+            get_filter_level(&cm->lf_info, mi)))
         continue;
 
       // Build masks based on the transform size of each block
@@ -1263,23 +1222,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
                                      mask_8x8_c & border_mask,
                                      mask_4x4_c & border_mask,
                                      mask_4x4_int[r],
-                                     &cm->lf_info, &lfl[r << 3],
+                                     cm->lf_info.lfthr, &lfl[r << 3],
                                      (int)cm->bit_depth);
     } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
       filter_selectively_vert(dst->buf, dst->stride,
                               mask_16x16_c & border_mask,
                               mask_8x8_c & border_mask,
                               mask_4x4_c & border_mask,
                               mask_4x4_int[r],
-                              &cm->lf_info, &lfl[r << 3]);
+                              cm->lf_info.lfthr, &lfl[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
-#else
-    filter_selectively_vert(dst->buf, dst->stride,
-                            mask_16x16_c & border_mask,
-                            mask_8x8_c & border_mask,
-                            mask_4x4_c & border_mask,
-                            mask_4x4_int[r],
-                            &cm->lf_info, &lfl[r << 3]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
     dst->buf += 8 * dst->stride;
     mi_8x8 += row_step_stride;
@@ -1312,23 +1266,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
                                       mask_8x8_r,
                                       mask_4x4_r,
                                       mask_4x4_int_r,
-                                      &cm->lf_info, &lfl[r << 3],
+                                      cm->lf_info.lfthr, &lfl[r << 3],
                                       (int)cm->bit_depth);
     } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
       filter_selectively_horiz(dst->buf, dst->stride,
                                mask_16x16_r,
                                mask_8x8_r,
                                mask_4x4_r,
                                mask_4x4_int_r,
-                               &cm->lf_info, &lfl[r << 3]);
+                               cm->lf_info.lfthr, &lfl[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
-#else
-    filter_selectively_horiz(dst->buf, dst->stride,
-                             mask_16x16_r,
-                             mask_8x8_r,
-                             mask_4x4_r,
-                             mask_4x4_int_r,
-                             &cm->lf_info, &lfl[r << 3]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
     dst->buf += 8 * dst->stride;
   }
@@ -1350,27 +1299,29 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
 
   // Vertical pass: do 2 rows at one time
   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
-    unsigned int mask_16x16_l = mask_16x16 & 0xffff;
-    unsigned int mask_8x8_l = mask_8x8 & 0xffff;
-    unsigned int mask_4x4_l = mask_4x4 & 0xffff;
-    unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
-
-// Disable filtering on the leftmost column.
+    // Disable filtering on the leftmost column.
 #if CONFIG_VP9_HIGHBITDEPTH
     if (cm->use_highbitdepth) {
-      highbd_filter_selectively_vert_row2(
-          plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
-          mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
-          &lfm->lfl_y[r << 3], (int)cm->bit_depth);
+      highbd_filter_selectively_vert_row2(plane->subsampling_x,
+                                          CONVERT_TO_SHORTPTR(dst->buf),
+                                          dst->stride,
+                                          (unsigned int)mask_16x16,
+                                          (unsigned int)mask_8x8,
+                                          (unsigned int)mask_4x4,
+                                          (unsigned int)mask_4x4_int,
+                                          cm->lf_info.lfthr,
+                                          &lfm->lfl_y[r << 3],
+                                          (int)cm->bit_depth);
     } else {
-      filter_selectively_vert_row2(
-          plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
-          mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,
+                                   (unsigned int)mask_16x16,
+                                   (unsigned int)mask_8x8,
+                                   (unsigned int)mask_4x4,
+                                   (unsigned int)mask_4x4_int,
+                                   cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
-#else
-    filter_selectively_vert_row2(
-        plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
-        mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
     dst->buf += 16 * dst->stride;
     mask_16x16 >>= 16;
@@ -1403,19 +1354,18 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
 
 #if CONFIG_VP9_HIGHBITDEPTH
     if (cm->use_highbitdepth) {
-      highbd_filter_selectively_horiz(
-          CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
-          mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3],
-          (int)cm->bit_depth);
+      highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
+                                      dst->stride, mask_16x16_r, mask_8x8_r,
+                                      mask_4x4_r, mask_4x4_int & 0xff,
+                                      cm->lf_info.lfthr, &lfm->lfl_y[r << 3],
+                                      (int)cm->bit_depth);
     } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
-                               mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
-                               &lfm->lfl_y[r << 3]);
+                               mask_4x4_r, mask_4x4_int & 0xff,
+                               cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
-#else
-    filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
-                             mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
-                             &lfm->lfl_y[r << 3]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
     dst->buf += 8 * dst->stride;
@@ -1449,38 +1399,35 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
       lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
     }
 
-    {
-      unsigned int mask_16x16_l = mask_16x16 & 0xff;
-      unsigned int mask_8x8_l = mask_8x8 & 0xff;
-      unsigned int mask_4x4_l = mask_4x4 & 0xff;
-      unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
-
-// Disable filtering on the leftmost column.
+    // Disable filtering on the leftmost column.
 #if CONFIG_VP9_HIGHBITDEPTH
-      if (cm->use_highbitdepth) {
-        highbd_filter_selectively_vert_row2(
-            plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
-            mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
-            &lfl_uv[r << 1], (int)cm->bit_depth);
-      } else {
-        filter_selectively_vert_row2(
-            plane->subsampling_x, dst->buf, dst->stride,
-            mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
-            &lfl_uv[r << 1]);
-      }
-#else
-      filter_selectively_vert_row2(
-          plane->subsampling_x, dst->buf, dst->stride,
-          mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
-          &lfl_uv[r << 1]);
+    if (cm->use_highbitdepth) {
+      highbd_filter_selectively_vert_row2(plane->subsampling_x,
+                                          CONVERT_TO_SHORTPTR(dst->buf),
+                                          dst->stride,
+                                          (unsigned int)mask_16x16,
+                                          (unsigned int)mask_8x8,
+                                          (unsigned int)mask_4x4,
+                                          (unsigned int)mask_4x4_int,
+                                          cm->lf_info.lfthr, &lfl_uv[r << 1],
+                                          (int)cm->bit_depth);
+    } else {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-
-      dst->buf += 16 * dst->stride;
-      mask_16x16 >>= 8;
-      mask_8x8 >>= 8;
-      mask_4x4 >>= 8;
-      mask_4x4_int >>= 8;
+      filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,
+                                   (unsigned int)mask_16x16,
+                                   (unsigned int)mask_8x8,
+                                   (unsigned int)mask_4x4,
+                                   (unsigned int)mask_4x4_int,
+                                   cm->lf_info.lfthr, &lfl_uv[r << 1]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    dst->buf += 16 * dst->stride;
+    mask_16x16 >>= 8;
+    mask_8x8 >>= 8;
+    mask_4x4 >>= 8;
+    mask_4x4_int >>= 8;
   }
 
   // Horizontal pass
@@ -1512,17 +1459,16 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
     if (cm->use_highbitdepth) {
       highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
                                       dst->stride, mask_16x16_r, mask_8x8_r,
-                                      mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
-                                      &lfl_uv[r << 1], (int)cm->bit_depth);
+                                      mask_4x4_r, mask_4x4_int_r,
+                                      cm->lf_info.lfthr, &lfl_uv[r << 1],
+                                      (int)cm->bit_depth);
     } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
-                               mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+                               mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr,
                                &lfl_uv[r << 1]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
-#else
-    filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
-                             mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
-                             &lfl_uv[r << 1]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
     dst->buf += 8 * dst->stride;
@@ -1558,7 +1504,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm,
 
       vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
 
-      // TODO(JBB): Make setup_mask work for non 420.
+      // TODO(jimbankoski): For 444 only need to do y mask.
       vp9_adjust_mask(cm, mi_row, mi_col, lfm);
 
       vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm);
@@ -1598,6 +1544,8 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
 }
 
 // Used by the encoder to build the loopfilter masks.
+// TODO(slavarnway): Do the encoder the same way the decoder does it and
+//                   build the masks in line as part of the encode process.
 void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level,
                           int partial_frame) {
   int start_mi_row, end_mi_row, mi_rows_to_filter;
@@ -1640,12 +1588,12 @@ static const uint8_t first_block_in_16x16[8][8] = {
 // This function sets up the bit masks for a block represented
 // by mi_row, mi_col in a 64x64 region.
 // TODO(SJL): This function only works for yv12.
-void vp9_build_mask(VP9_COMMON *cm, const MB_MODE_INFO *mbmi, int mi_row,
+void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row,
                     int mi_col, int bw, int bh) {
-  const BLOCK_SIZE block_size = mbmi->sb_type;
-  const TX_SIZE tx_size_y = mbmi->tx_size;
+  const BLOCK_SIZE block_size = mi->sb_type;
+  const TX_SIZE tx_size_y = mi->tx_size;
   const loop_filter_info_n *const lfi_n = &cm->lf_info;
-  const int filter_level = get_filter_level(lfi_n, mbmi);
+  const int filter_level = get_filter_level(lfi_n, mi);
   const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1);
   LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col);
   uint64_t *const left_y = &lfm->left_y[tx_size_y];
@@ -1693,7 +1641,7 @@ void vp9_build_mask(VP9_COMMON *cm, const MB_MODE_INFO *mbmi, int mi_row,
 
   // If the block has no coefficients and is not intra we skip applying
   // the loop filter on block edges.
-  if (mbmi->skip && is_inter_block(mbmi))
+  if (mi->skip && is_inter_block(mi))
     return;
 
   // Add a mask for the transform size. The transform size mask is set to
diff --git a/libvpx/vp9/common/vp9_loopfilter.h b/libvpx/vp9/common/vp9_loopfilter.h
index 7f943ea09..fca8830fa 100644
--- a/libvpx/vp9/common/vp9_loopfilter.h
+++ b/libvpx/vp9/common/vp9_loopfilter.h
@@ -69,6 +69,7 @@ typedef struct {
 
 struct loopfilter {
   int filter_level;
+  int last_filt_level;
 
   int sharpness_level;
   int last_sharpness_level;
@@ -134,7 +135,7 @@ static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf,
   return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)];
 }
 
-void vp9_build_mask(struct VP9Common *cm, const MB_MODE_INFO *mbmi, int mi_row,
+void vp9_build_mask(struct VP9Common *cm, const MODE_INFO *mi, int mi_row,
                     int mi_col, int bw, int bh);
 void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row,
                      const int mi_col, LOOP_FILTER_MASK *lfm);
diff --git a/libvpx/vp9/common/vp9_mfqe.c b/libvpx/vp9/common/vp9_mfqe.c
index 6d560f438..f5264665b 100644
--- a/libvpx/vp9/common/vp9_mfqe.c
+++ b/libvpx/vp9/common/vp9_mfqe.c
@@ -203,12 +203,12 @@ static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
 static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) {
   // Check the motion in current block(for inter frame),
   // or check the motion in the correlated block in last frame (for keyframe).
-  const int mv_len_square = mi->mbmi.mv[0].as_mv.row *
-                            mi->mbmi.mv[0].as_mv.row +
-                            mi->mbmi.mv[0].as_mv.col *
-                            mi->mbmi.mv[0].as_mv.col;
+  const int mv_len_square = mi->mv[0].as_mv.row *
+                            mi->mv[0].as_mv.row +
+                            mi->mv[0].as_mv.col *
+                            mi->mv[0].as_mv.col;
   const int mv_threshold = 100;
-  return mi->mbmi.mode >= NEARESTMV &&  // Not an intra block
+  return mi->mode >= NEARESTMV &&  // Not an intra block
          cur_bs >= BLOCK_16X16 &&
          mv_len_square <= mv_threshold;
 }
@@ -220,7 +220,7 @@ static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs,
                            uint8_t *yd, uint8_t *ud, uint8_t *vd,
                            int yd_stride, int uvd_stride) {
   int mi_offset, y_offset, uv_offset;
-  const BLOCK_SIZE cur_bs = mi->mbmi.sb_type;
+  const BLOCK_SIZE cur_bs = mi->sb_type;
   const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex;
   const int bsl = b_width_log2_lookup[bs];
   PARTITION_TYPE partition = partition_lookup[bsl][cur_bs];
diff --git a/libvpx/vp9/common/vp9_mvref_common.c b/libvpx/vp9/common/vp9_mvref_common.c
index 77d1ff459..0eb01a51b 100644
--- a/libvpx/vp9/common/vp9_mvref_common.c
+++ b/libvpx/vp9/common/vp9_mvref_common.c
@@ -11,20 +11,19 @@
 
 #include "vp9/common/vp9_mvref_common.h"
 
-// This function searches the neighbourhood of a given MB/SB
+// This function searches the neighborhood of a given MB/SB
 // to try and find candidate reference vectors.
 static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                              MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                              int_mv *mv_ref_list,
                              int block, int mi_row, int mi_col,
-                             find_mv_refs_sync sync, void *const data,
                              uint8_t *mode_context) {
   const int *ref_sign_bias = cm->ref_frame_sign_bias;
   int i, refmv_count = 0;
-  const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
+  const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type];
   int different_ref_found = 0;
   int context_counter = 0;
-  const MV_REF *const  prev_frame_mvs = cm->use_prev_frame_mvs ?
+  const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
       cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL;
   const TileInfo *const tile = &xd->tile;
 
@@ -39,15 +38,14 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
     if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
       const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
                                                    xd->mi_stride];
-      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
       // Keep counts for entropy encoding.
-      context_counter += mode_2_counter[candidate->mode];
+      context_counter += mode_2_counter[candidate_mi->mode];
       different_ref_found = 1;
 
-      if (candidate->ref_frame[0] == ref_frame)
+      if (candidate_mi->ref_frame[0] == ref_frame)
         ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
                         refmv_count, mv_ref_list, Done);
-      else if (candidate->ref_frame[1] == ref_frame)
+      else if (candidate_mi->ref_frame[1] == ref_frame)
         ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
                         refmv_count, mv_ref_list, Done);
     }
@@ -59,34 +57,19 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
   for (; i < MVREF_NEIGHBOURS; ++i) {
     const POSITION *const mv_ref = &mv_ref_search[i];
     if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
-      const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
-                                                    xd->mi_stride]->mbmi;
+      const MODE_INFO *const candidate_mi =
+          xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
       different_ref_found = 1;
 
-      if (candidate->ref_frame[0] == ref_frame)
-        ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done);
-      else if (candidate->ref_frame[1] == ref_frame)
-        ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list, Done);
+      if (candidate_mi->ref_frame[0] == ref_frame)
+        ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done);
+      else if (candidate_mi->ref_frame[1] == ref_frame)
+        ADD_MV_REF_LIST(candidate_mi->mv[1], refmv_count, mv_ref_list, Done);
     }
   }
 
-  // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
-  // on windows platform. The sync here is unncessary if use_perv_frame_mvs
-  // is 0. But after removing it, there will be hang in the unit test on windows
-  // due to several threads waiting for a thread's signal.
-#if defined(_WIN32) && !HAVE_PTHREAD_H
-    if (cm->frame_parallel_decode && sync != NULL) {
-      sync(data, mi_row);
-    }
-#endif
-
   // Check the last frame's mode and mv info.
   if (cm->use_prev_frame_mvs) {
-    // Synchronize here for frame parallel decode if sync function is provided.
-    if (cm->frame_parallel_decode && sync != NULL) {
-      sync(data, mi_row);
-    }
-
     if (prev_frame_mvs->ref_frame[0] == ref_frame) {
       ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
     } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
@@ -101,11 +84,11 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
     for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
       const POSITION *mv_ref = &mv_ref_search[i];
       if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
-        const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
-                                              * xd->mi_stride]->mbmi;
+        const MODE_INFO *const candidate_mi =
+            xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
 
         // If the candidate is INTRA we don't want to consider its mv.
-        IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
+        IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias,
                                  refmv_count, mv_ref_list, Done);
       }
     }
@@ -150,20 +133,9 @@ void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                       MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                       int_mv *mv_ref_list,
                       int mi_row, int mi_col,
-                      find_mv_refs_sync sync, void *const data,
                       uint8_t *mode_context) {
   find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1,
-                   mi_row, mi_col, sync, data, mode_context);
-}
-
-static void lower_mv_precision(MV *mv, int allow_hp) {
-  const int use_hp = allow_hp && vp9_use_mv_hp(mv);
-  if (!use_hp) {
-    if (mv->row & 1)
-      mv->row += (mv->row > 0 ? -1 : 1);
-    if (mv->col & 1)
-      mv->col += (mv->col > 0 ? -1 : 1);
-  }
+                   mi_row, mi_col, mode_context);
 }
 
 void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
@@ -190,8 +162,8 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
 
   assert(MAX_MV_REF_CANDIDATES == 2);
 
-  find_mv_refs_idx(cm, xd, mi, mi->mbmi.ref_frame[ref], mv_list, block,
-                   mi_row, mi_col, NULL, NULL, mode_context);
+  find_mv_refs_idx(cm, xd, mi, mi->ref_frame[ref], mv_list, block,
+                   mi_row, mi_col, mode_context);
 
   near_mv->as_int = 0;
   switch (block) {
diff --git a/libvpx/vp9/common/vp9_mvref_common.h b/libvpx/vp9/common/vp9_mvref_common.h
index bd216d433..4380843e2 100644
--- a/libvpx/vp9/common/vp9_mvref_common.h
+++ b/libvpx/vp9/common/vp9_mvref_common.h
@@ -136,19 +136,19 @@ static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
 // on whether the block_size < 8x8 and we have check_sub_blocks set.
 static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
                                       int search_col, int block_idx) {
-  return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
+  return block_idx >= 0 && candidate->sb_type < BLOCK_8X8
           ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
               .as_mv[which_mv]
-          : candidate->mbmi.mv[which_mv];
+          : candidate->mv[which_mv];
 }
 
 
 // Performs mv sign inversion if indicated by the reference frame combination.
-static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
+static INLINE int_mv scale_mv(const MODE_INFO *mi, int ref,
                               const MV_REFERENCE_FRAME this_ref_frame,
                               const int *ref_sign_bias) {
-  int_mv mv = mbmi->mv[ref];
-  if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
+  int_mv mv = mi->mv[ref];
+  if (ref_sign_bias[mi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
     mv.as_mv.row *= -1;
     mv.as_mv.col *= -1;
   }
@@ -157,7 +157,7 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
 
 // This macro is used to add a motion vector mv_ref list if it isn't
 // already in the list.  If it's the second motion vector it will also
-// skip all additional processing and jump to done!
+// skip all additional processing and jump to Done!
 #define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \
   do { \
     if (refmv_count) { \
@@ -207,11 +207,20 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
                xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
 }
 
+static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
+  const int use_hp = allow_hp && use_mv_hp(mv);
+  if (!use_hp) {
+    if (mv->row & 1)
+      mv->row += (mv->row > 0 ? -1 : 1);
+    if (mv->col & 1)
+      mv->col += (mv->col > 0 ? -1 : 1);
+  }
+}
+
 typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
 void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                       MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                       int_mv *mv_ref_list, int mi_row, int mi_col,
-                      find_mv_refs_sync sync, void *const data,
                       uint8_t *mode_context);
 
 // check a list of motion vectors by sad score using a number rows of pixels
diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h
index ceffdedf9..3fd935e62 100644
--- a/libvpx/vp9/common/vp9_onyxc_int.h
+++ b/libvpx/vp9/common/vp9_onyxc_int.h
@@ -404,25 +404,8 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
   xd->mb_to_right_edge  = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
 
   // Are edges available for intra prediction?
-  xd->up_available    = (mi_row != 0);
-  xd->left_available  = (mi_col > tile->mi_col_start);
-  if (xd->up_available) {
-    xd->above_mi = xd->mi[-xd->mi_stride];
-    // above_mi may be NULL in VP9 encoder's first pass.
-    xd->above_mbmi = xd->above_mi ? &xd->above_mi->mbmi : NULL;
-  } else {
-    xd->above_mi = NULL;
-    xd->above_mbmi = NULL;
-  }
-
-  if (xd->left_available) {
-    xd->left_mi = xd->mi[-1];
-    // left_mi may be NULL in VP9 encoder's first pass.
-    xd->left_mbmi = xd->left_mi ? &xd->left_mi->mbmi : NULL;
-  } else {
-    xd->left_mi = NULL;
-    xd->left_mbmi = NULL;
-  }
+  xd->above_mi = (mi_row != 0) ? xd->mi[-xd->mi_stride] : NULL;
+  xd->left_mi  = (mi_col > tile->mi_col_start) ? xd->mi[-1] : NULL;
 }
 
 static INLINE void update_partition_context(MACROBLOCKD *xd,
diff --git a/libvpx/vp9/common/vp9_postproc.c b/libvpx/vp9/common/vp9_postproc.c
index b685d813b..c04cc8f05 100644
--- a/libvpx/vp9/common/vp9_postproc.c
+++ b/libvpx/vp9/common/vp9_postproc.c
@@ -12,6 +12,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 
+#include "./vpx_dsp_rtcd.h"
 #include "./vpx_config.h"
 #include "./vpx_scale_rtcd.h"
 #include "./vp9_rtcd.h"
@@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) {
   state->last_noise = a;
 }
 
-void vp9_plane_add_noise_c(uint8_t *start, char *noise,
-                           char blackclamp[16],
-                           char whiteclamp[16],
-                           char bothclamp[16],
-                           unsigned int width, unsigned int height, int pitch) {
-  unsigned int i, j;
-
-  // TODO(jbb): why does simd code use both but c doesn't,  normalize and
-  // fix..
-  (void) bothclamp;
-  for (i = 0; i < height; i++) {
-    uint8_t *pos = start + i * pitch;
-    char  *ref = (char *)(noise + (rand() & 0xff));  // NOLINT
-
-    for (j = 0; j < width; j++) {
-      if (pos[j] < blackclamp[0])
-        pos[j] = blackclamp[0];
-
-      if (pos[j] > 255 + whiteclamp[0])
-        pos[j] = 255 + whiteclamp[0];
-
-      pos[j] += ref[j];
-    }
-  }
-}
-
 static void swap_mi_and_prev_mi(VP9_COMMON *cm) {
   // Current mip will be the prev_mip for the next frame.
   MODE_INFO *temp = cm->postproc_state.prev_mip;
@@ -726,8 +701,7 @@ int vp9_post_proc_frame(struct VP9Common *cm,
         ppstate->last_noise != noise_level) {
       fillrd(ppstate, 63 - q, noise_level);
     }
-
-    vp9_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
+    vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
                         ppstate->whiteclamp, ppstate->bothclamp,
                         ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
   }
diff --git a/libvpx/vp9/common/vp9_pred_common.c b/libvpx/vp9/common/vp9_pred_common.c
index 1f1632573..8f90e70e7 100644
--- a/libvpx/vp9/common/vp9_pred_common.c
+++ b/libvpx/vp9/common/vp9_pred_common.c
@@ -17,82 +17,57 @@
 int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
   // Note:
   // The mode info data structure has a one element border above and to the
-  // left of the entries correpsonding to real macroblocks.
-  // The prediction flags in these dummy entries are initialised to 0.
-  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
-  const int left_type = xd->left_available && is_inter_block(left_mbmi) ?
-                            left_mbmi->interp_filter : SWITCHABLE_FILTERS;
-  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
-  const int above_type = xd->up_available && is_inter_block(above_mbmi) ?
-                             above_mbmi->interp_filter : SWITCHABLE_FILTERS;
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int left_type = left_mi && is_inter_block(left_mi) ?
+                            left_mi->interp_filter : SWITCHABLE_FILTERS;
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const int above_type = above_mi && is_inter_block(above_mi) ?
+                             above_mi->interp_filter : SWITCHABLE_FILTERS;
 
   if (left_type == above_type)
     return left_type;
-  else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
+  else if (left_type == SWITCHABLE_FILTERS)
     return above_type;
-  else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
+  else if (above_type == SWITCHABLE_FILTERS)
     return left_type;
   else
     return SWITCHABLE_FILTERS;
 }
 
-// The mode info data structure has a one element border above and to the
-// left of the entries corresponding to real macroblocks.
-// The prediction flags in these dummy entries are initialized to 0.
-// 0 - inter/inter, inter/--, --/inter, --/--
-// 1 - intra/inter, inter/intra
-// 2 - intra/--, --/intra
-// 3 - intra/intra
-int vp9_get_intra_inter_context(const MACROBLOCKD *xd) {
-  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
-  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
-  const int has_above = xd->up_available;
-  const int has_left = xd->left_available;
-
-  if (has_above && has_left) {  // both edges available
-    const int above_intra = !is_inter_block(above_mbmi);
-    const int left_intra = !is_inter_block(left_mbmi);
-    return left_intra && above_intra ? 3
-                                     : left_intra || above_intra;
-  } else if (has_above || has_left) {  // one edge available
-    return 2 * !is_inter_block(has_above ? above_mbmi : left_mbmi);
-  } else {
-    return 0;
-  }
-}
-
 int vp9_get_reference_mode_context(const VP9_COMMON *cm,
                                    const MACROBLOCKD *xd) {
   int ctx;
-  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
-  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
-  const int has_above = xd->up_available;
-  const int has_left = xd->left_available;
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int has_above = !!above_mi;
+  const int has_left = !!left_mi;
   // Note:
   // The mode info data structure has a one element border above and to the
-  // left of the entries correpsonding to real macroblocks.
-  // The prediction flags in these dummy entries are initialised to 0.
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
   if (has_above && has_left) {  // both edges available
-    if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi))
+    if (!has_second_ref(above_mi) && !has_second_ref(left_mi))
       // neither edge uses comp pred (0/1)
-      ctx = (above_mbmi->ref_frame[0] == cm->comp_fixed_ref) ^
-            (left_mbmi->ref_frame[0] == cm->comp_fixed_ref);
-    else if (!has_second_ref(above_mbmi))
+      ctx = (above_mi->ref_frame[0] == cm->comp_fixed_ref) ^
+            (left_mi->ref_frame[0] == cm->comp_fixed_ref);
+    else if (!has_second_ref(above_mi))
       // one of two edges uses comp pred (2/3)
-      ctx = 2 + (above_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
-                 !is_inter_block(above_mbmi));
-    else if (!has_second_ref(left_mbmi))
+      ctx = 2 + (above_mi->ref_frame[0] == cm->comp_fixed_ref ||
+                 !is_inter_block(above_mi));
+    else if (!has_second_ref(left_mi))
       // one of two edges uses comp pred (2/3)
-      ctx = 2 + (left_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
-                 !is_inter_block(left_mbmi));
+      ctx = 2 + (left_mi->ref_frame[0] == cm->comp_fixed_ref ||
+                 !is_inter_block(left_mi));
     else  // both edges use comp pred (4)
       ctx = 4;
   } else if (has_above || has_left) {  // one edge available
-    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+    const MODE_INFO *edge_mi = has_above ? above_mi : left_mi;
 
-    if (!has_second_ref(edge_mbmi))
+    if (!has_second_ref(edge_mi))
       // edge does not use comp pred (0/1)
-      ctx = edge_mbmi->ref_frame[0] == cm->comp_fixed_ref;
+      ctx = edge_mi->ref_frame[0] == cm->comp_fixed_ref;
     else
       // edge uses comp pred (3)
       ctx = 3;
@@ -107,39 +82,39 @@ int vp9_get_reference_mode_context(const VP9_COMMON *cm,
 int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
                                     const MACROBLOCKD *xd) {
   int pred_context;
-  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
-  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
-  const int above_in_image = xd->up_available;
-  const int left_in_image = xd->left_available;
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int above_in_image = !!above_mi;
+  const int left_in_image = !!left_mi;
 
   // Note:
   // The mode info data structure has a one element border above and to the
-  // left of the entries correpsonding to real macroblocks.
-  // The prediction flags in these dummy entries are initialised to 0.
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
   const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
   const int var_ref_idx = !fix_ref_idx;
 
   if (above_in_image && left_in_image) {  // both edges available
-    const int above_intra = !is_inter_block(above_mbmi);
-    const int left_intra = !is_inter_block(left_mbmi);
+    const int above_intra = !is_inter_block(above_mi);
+    const int left_intra = !is_inter_block(left_mi);
 
     if (above_intra && left_intra) {  // intra/intra (2)
       pred_context = 2;
     } else if (above_intra || left_intra) {  // intra/inter
-      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+      const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi;
 
-      if (!has_second_ref(edge_mbmi))  // single pred (1/3)
-        pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
+      if (!has_second_ref(edge_mi))  // single pred (1/3)
+        pred_context = 1 + 2 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]);
       else  // comp pred (1/3)
-        pred_context = 1 + 2 * (edge_mbmi->ref_frame[var_ref_idx]
+        pred_context = 1 + 2 * (edge_mi->ref_frame[var_ref_idx]
                                     != cm->comp_var_ref[1]);
     } else {  // inter/inter
-      const int l_sg = !has_second_ref(left_mbmi);
-      const int a_sg = !has_second_ref(above_mbmi);
-      const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0]
-                                           : above_mbmi->ref_frame[var_ref_idx];
-      const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0]
-                                           : left_mbmi->ref_frame[var_ref_idx];
+      const int l_sg = !has_second_ref(left_mi);
+      const int a_sg = !has_second_ref(above_mi);
+      const MV_REFERENCE_FRAME vrfa = a_sg ? above_mi->ref_frame[0]
+                                           : above_mi->ref_frame[var_ref_idx];
+      const MV_REFERENCE_FRAME vrfl = l_sg ? left_mi->ref_frame[0]
+                                           : left_mi->ref_frame[var_ref_idx];
 
       if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) {
         pred_context = 0;
@@ -167,16 +142,16 @@ int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
       }
     }
   } else if (above_in_image || left_in_image) {  // one edge available
-    const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+    const MODE_INFO *edge_mi = above_in_image ? above_mi : left_mi;
 
-    if (!is_inter_block(edge_mbmi)) {
+    if (!is_inter_block(edge_mi)) {
       pred_context = 2;
     } else {
-      if (has_second_ref(edge_mbmi))
-        pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx]
+      if (has_second_ref(edge_mi))
+        pred_context = 4 * (edge_mi->ref_frame[var_ref_idx]
                               != cm->comp_var_ref[1]);
       else
-        pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
+        pred_context = 3 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]);
     }
   } else {  // no edges available (2)
     pred_context = 2;
@@ -188,34 +163,34 @@ int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
 
 int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
   int pred_context;
-  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
-  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
-  const int has_above = xd->up_available;
-  const int has_left = xd->left_available;
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int has_above = !!above_mi;
+  const int has_left = !!left_mi;
   // Note:
   // The mode info data structure has a one element border above and to the
-  // left of the entries correpsonding to real macroblocks.
-  // The prediction flags in these dummy entries are initialised to 0.
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
   if (has_above && has_left) {  // both edges available
-    const int above_intra = !is_inter_block(above_mbmi);
-    const int left_intra = !is_inter_block(left_mbmi);
+    const int above_intra = !is_inter_block(above_mi);
+    const int left_intra = !is_inter_block(left_mi);
 
     if (above_intra && left_intra) {  // intra/intra
       pred_context = 2;
     } else if (above_intra || left_intra) {  // intra/inter or inter/intra
-      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
-      if (!has_second_ref(edge_mbmi))
-        pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+      const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi;
+      if (!has_second_ref(edge_mi))
+        pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME);
       else
-        pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
-                            edge_mbmi->ref_frame[1] == LAST_FRAME);
+        pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME ||
+                            edge_mi->ref_frame[1] == LAST_FRAME);
     } else {  // inter/inter
-      const int above_has_second = has_second_ref(above_mbmi);
-      const int left_has_second = has_second_ref(left_mbmi);
-      const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
-      const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
-      const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
-      const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+      const int above_has_second = has_second_ref(above_mi);
+      const int left_has_second = has_second_ref(left_mi);
+      const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0];
+      const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1];
+      const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0];
+      const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1];
 
       if (above_has_second && left_has_second) {
         pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME ||
@@ -234,15 +209,15 @@ int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
       }
     }
   } else if (has_above || has_left) {  // one edge available
-    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
-    if (!is_inter_block(edge_mbmi)) {  // intra
+    const MODE_INFO *edge_mi = has_above ? above_mi : left_mi;
+    if (!is_inter_block(edge_mi)) {  // intra
       pred_context = 2;
     } else {  // inter
-      if (!has_second_ref(edge_mbmi))
-        pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+      if (!has_second_ref(edge_mi))
+        pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME);
       else
-        pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
-                            edge_mbmi->ref_frame[1] == LAST_FRAME);
+        pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME ||
+                            edge_mi->ref_frame[1] == LAST_FRAME);
     }
   } else {  // no edges available
     pred_context = 2;
@@ -254,39 +229,39 @@ int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
 
 int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
   int pred_context;
-  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
-  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
-  const int has_above = xd->up_available;
-  const int has_left = xd->left_available;
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int has_above = !!above_mi;
+  const int has_left = !!left_mi;
 
   // Note:
   // The mode info data structure has a one element border above and to the
-  // left of the entries correpsonding to real macroblocks.
-  // The prediction flags in these dummy entries are initialised to 0.
+  // left of the entries corresponding to real macroblocks.
+  // The prediction flags in these dummy entries are initialized to 0.
   if (has_above && has_left) {  // both edges available
-    const int above_intra = !is_inter_block(above_mbmi);
-    const int left_intra = !is_inter_block(left_mbmi);
+    const int above_intra = !is_inter_block(above_mi);
+    const int left_intra = !is_inter_block(left_mi);
 
     if (above_intra && left_intra) {  // intra/intra
       pred_context = 2;
     } else if (above_intra || left_intra) {  // intra/inter or inter/intra
-      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
-      if (!has_second_ref(edge_mbmi)) {
-        if (edge_mbmi->ref_frame[0] == LAST_FRAME)
+      const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi;
+      if (!has_second_ref(edge_mi)) {
+        if (edge_mi->ref_frame[0] == LAST_FRAME)
           pred_context = 3;
         else
-          pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+          pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME);
       } else {
-        pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
-                                edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
+        pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
+                                edge_mi->ref_frame[1] == GOLDEN_FRAME);
       }
     } else {  // inter/inter
-      const int above_has_second = has_second_ref(above_mbmi);
-      const int left_has_second = has_second_ref(left_mbmi);
-      const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
-      const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
-      const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
-      const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+      const int above_has_second = has_second_ref(above_mi);
+      const int left_has_second = has_second_ref(left_mi);
+      const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0];
+      const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1];
+      const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0];
+      const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1];
 
       if (above_has_second && left_has_second) {
         if (above0 == left0 && above1 == left1)
@@ -321,16 +296,16 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
       }
     }
   } else if (has_above || has_left) {  // one edge available
-    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+    const MODE_INFO *edge_mi = has_above ? above_mi : left_mi;
 
-    if (!is_inter_block(edge_mbmi) ||
-        (edge_mbmi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mbmi)))
+    if (!is_inter_block(edge_mi) ||
+        (edge_mi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mi)))
       pred_context = 2;
-    else if (!has_second_ref(edge_mbmi))
-      pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+    else if (!has_second_ref(edge_mi))
+      pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME);
     else
-      pred_context = 3 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
-                          edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
+      pred_context = 3 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
+                          edge_mi->ref_frame[1] == GOLDEN_FRAME);
   } else {  // no edges available (2)
     pred_context = 2;
   }
diff --git a/libvpx/vp9/common/vp9_pred_common.h b/libvpx/vp9/common/vp9_pred_common.h
index 6f7af4a50..f3c676e95 100644
--- a/libvpx/vp9/common/vp9_pred_common.h
+++ b/libvpx/vp9/common/vp9_pred_common.h
@@ -42,8 +42,8 @@ static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) {
   const MODE_INFO *const above_mi = xd->above_mi;
   const MODE_INFO *const left_mi = xd->left_mi;
   const int above_sip = (above_mi != NULL) ?
-                        above_mi->mbmi.seg_id_predicted : 0;
-  const int left_sip = (left_mi != NULL) ? left_mi->mbmi.seg_id_predicted : 0;
+                        above_mi->seg_id_predicted : 0;
+  const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0;
 
   return above_sip + left_sip;
 }
@@ -56,8 +56,8 @@ static INLINE vpx_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg,
 static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) {
   const MODE_INFO *const above_mi = xd->above_mi;
   const MODE_INFO *const left_mi = xd->left_mi;
-  const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip : 0;
-  const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip : 0;
+  const int above_skip = (above_mi != NULL) ? above_mi->skip : 0;
+  const int left_skip = (left_mi != NULL) ? left_mi->skip : 0;
   return above_skip + left_skip;
 }
 
@@ -68,11 +68,32 @@ static INLINE vpx_prob vp9_get_skip_prob(const VP9_COMMON *cm,
 
 int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
 
-int vp9_get_intra_inter_context(const MACROBLOCKD *xd);
+// The mode info data structure has a one element border above and to the
+// left of the entries corresponding to real macroblocks.
+// The prediction flags in these dummy entries are initialized to 0.
+// 0 - inter/inter, inter/--, --/inter, --/--
+// 1 - intra/inter, inter/intra
+// 2 - intra/--, --/intra
+// 3 - intra/intra
+static INLINE int get_intra_inter_context(const MACROBLOCKD *xd) {
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int has_above = !!above_mi;
+  const int has_left = !!left_mi;
+
+  if (has_above && has_left) {  // both edges available
+    const int above_intra = !is_inter_block(above_mi);
+    const int left_intra = !is_inter_block(left_mi);
+    return left_intra && above_intra ? 3 : left_intra || above_intra;
+  } else if (has_above || has_left) {  // one edge available
+    return 2 * !is_inter_block(has_above ? above_mi : left_mi);
+  }
+  return 0;
+}
 
 static INLINE vpx_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm,
                                                 const MACROBLOCKD *xd) {
-  return cm->fc->intra_inter_prob[vp9_get_intra_inter_context(xd)];
+  return cm->fc->intra_inter_prob[get_intra_inter_context(xd)];
 }
 
 int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd);
@@ -110,15 +131,15 @@ static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm,
 // left of the entries corresponding to real blocks.
 // The prediction flags in these dummy entries are initialized to 0.
 static INLINE int get_tx_size_context(const MACROBLOCKD *xd) {
-  const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type];
-  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
-  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
-  const int has_above = xd->up_available;
-  const int has_left = xd->left_available;
-  int above_ctx = (has_above && !above_mbmi->skip) ? (int)above_mbmi->tx_size
-                                                   : max_tx_size;
-  int left_ctx = (has_left && !left_mbmi->skip) ? (int)left_mbmi->tx_size
-                                                : max_tx_size;
+  const int max_tx_size = max_txsize_lookup[xd->mi[0]->sb_type];
+  const MODE_INFO *const above_mi = xd->above_mi;
+  const MODE_INFO *const left_mi = xd->left_mi;
+  const int has_above = !!above_mi;
+  const int has_left = !!left_mi;
+  int above_ctx = (has_above && !above_mi->skip) ? (int)above_mi->tx_size
+                                                 : max_tx_size;
+  int left_ctx = (has_left && !left_mi->skip) ? (int)left_mi->tx_size
+                                              : max_tx_size;
   if (!has_left)
     left_ctx = above_ctx;
 
diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c
index d8c14ecc8..84718e970 100644
--- a/libvpx/vp9/common/vp9_reconinter.c
+++ b/libvpx/vp9/common/vp9_reconinter.c
@@ -20,19 +20,6 @@
 #include "vp9/common/vp9_reconintra.h"
 
 #if CONFIG_VP9_HIGHBITDEPTH
-void high_inter_predictor(const uint8_t *src, int src_stride,
-                                 uint8_t *dst, int dst_stride,
-                                 const int subpel_x,
-                                 const int subpel_y,
-                                 const struct scale_factors *sf,
-                                 int w, int h, int ref,
-                                 const InterpKernel *kernel,
-                                 int xs, int ys, int bd) {
-  sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
-      src, src_stride, dst, dst_stride,
-      kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
-}
-
 void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
                                       uint8_t *dst, int dst_stride,
                                       const MV *src_mv,
@@ -50,8 +37,9 @@ void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
 
   src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
 
-  high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
-                       sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, bd);
+  highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+                         sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4,
+                         bd);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
@@ -159,8 +147,8 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
                                    int mi_x, int mi_y) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
   const MODE_INFO *mi = xd->mi[0];
-  const int is_compound = has_second_ref(&mi->mbmi);
-  const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter];
+  const int is_compound = has_second_ref(mi);
+  const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
   int ref;
 
   for (ref = 0; ref < 1 + is_compound; ++ref) {
@@ -168,9 +156,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
     struct buf_2d *const pre_buf = &pd->pre[ref];
     struct buf_2d *const dst_buf = &pd->dst;
     uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
-    const MV mv = mi->mbmi.sb_type < BLOCK_8X8
+    const MV mv = mi->sb_type < BLOCK_8X8
                ? average_split_mvs(pd, mi, ref, block)
-               : mi->mbmi.mv[ref].as_mv;
+               : mi->mv[ref].as_mv;
 
     // TODO(jkoleszar): This clamping is done in the incorrect place for the
     // scaling case. It needs to be done on the scaled MV, not the pre-scaling
@@ -190,6 +178,12 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
       // Co-ordinate of containing block to pixel precision.
       const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
       const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
+#if CONFIG_BETTER_HW_COMPATIBILITY
+      assert(xd->mi[0]->sb_type != BLOCK_4X8 &&
+             xd->mi[0]->sb_type != BLOCK_8X4);
+      assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) &&
+             mv_q4.col == mv.col * (1 << (1 - pd->subsampling_x)));
+#endif
       if (plane == 0)
         pre_buf->buf = xd->block_refs[ref]->buf->y_buffer;
       else if (plane == 1)
@@ -216,9 +210,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
 
 #if CONFIG_VP9_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
-                           subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
-                           xd->bd);
+      highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
+                             subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
+                             xd->bd);
     } else {
       inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
                       subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
@@ -244,7 +238,7 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
     const int bw = 4 * num_4x4_w;
     const int bh = 4 * num_4x4_h;
 
-    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+    if (xd->mi[0]->sb_type < BLOCK_8X8) {
       int i = 0, x, y;
       assert(bsize == BLOCK_8X8);
       for (y = 0; y < num_4x4_h; ++y)
diff --git a/libvpx/vp9/common/vp9_reconinter.h b/libvpx/vp9/common/vp9_reconinter.h
index 7d907748e..07745e3aa 100644
--- a/libvpx/vp9/common/vp9_reconinter.h
+++ b/libvpx/vp9/common/vp9_reconinter.h
@@ -34,14 +34,18 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-void high_inter_predictor(const uint8_t *src, int src_stride,
-                                 uint8_t *dst, int dst_stride,
-                                 const int subpel_x,
-                                 const int subpel_y,
-                                 const struct scale_factors *sf,
-                                 int w, int h, int ref,
-                                 const InterpKernel *kernel,
-                                 int xs, int ys, int bd);
+static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
+                                          uint8_t *dst, int dst_stride,
+                                          const int subpel_x,
+                                          const int subpel_y,
+                                          const struct scale_factors *sf,
+                                          int w, int h, int ref,
+                                          const InterpKernel *kernel,
+                                          int xs, int ys, int bd) {
+  sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+      src, src_stride, dst, dst_stride,
+      kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+}
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi,
diff --git a/libvpx/vp9/common/vp9_reconintra.c b/libvpx/vp9/common/vp9_reconintra.c
index 3d84a2883..445785835 100644
--- a/libvpx/vp9/common/vp9_reconintra.c
+++ b/libvpx/vp9/common/vp9_reconintra.c
@@ -133,12 +133,16 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
   int frame_width, frame_height;
   int x0, y0;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const int need_left = extend_modes[mode] & NEED_LEFT;
+  const int need_above = extend_modes[mode] & NEED_ABOVE;
+  const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT;
   int base = 128 << (bd - 8);
   // 127 127 127 .. 127 127 127 127 127 127
   // 129  A   B  ..  Y   Z
   // 129  C   D  ..  W   X
   // 129  E   F  ..  U   V
   // 129  G   H  ..  S   T   T   T   T   T
+  // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1.
 
   // Get current frame pointer, width and height.
   if (plane == 0) {
@@ -153,79 +157,106 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
   x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
   y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
 
-  // left
-  if (left_available) {
-    if (xd->mb_to_bottom_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (y0 + bs <= frame_height) {
-        for (i = 0; i < bs; ++i)
-          left_col[i] = ref[i * ref_stride - 1];
+  // NEED_LEFT
+  if (need_left) {
+    if (left_available) {
+      if (xd->mb_to_bottom_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (y0 + bs <= frame_height) {
+          for (i = 0; i < bs; ++i)
+            left_col[i] = ref[i * ref_stride - 1];
+        } else {
+          const int extend_bottom = frame_height - y0;
+          for (i = 0; i < extend_bottom; ++i)
+            left_col[i] = ref[i * ref_stride - 1];
+          for (; i < bs; ++i)
+            left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
+        }
       } else {
-        const int extend_bottom = frame_height - y0;
-        for (i = 0; i < extend_bottom; ++i)
+        /* faster path if the block does not need extension */
+        for (i = 0; i < bs; ++i)
           left_col[i] = ref[i * ref_stride - 1];
-        for (; i < bs; ++i)
-          left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
       }
     } else {
-      /* faster path if the block does not need extension */
-      for (i = 0; i < bs; ++i)
-        left_col[i] = ref[i * ref_stride - 1];
+      vpx_memset16(left_col, base + 1, bs);
     }
-  } else {
-    // TODO(Peter): this value should probably change for high bitdepth
-    vpx_memset16(left_col, base + 1, bs);
   }
 
-  // TODO(hkuang) do not extend 2*bs pixels for all modes.
-  // above
-  if (up_available) {
-    const uint16_t *above_ref = ref - ref_stride;
-    if (xd->mb_to_right_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (x0 + 2 * bs <= frame_width) {
-        if (right_available && bs == 4) {
-          memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
+  // NEED_ABOVE
+  if (need_above) {
+    if (up_available) {
+      const uint16_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + bs <= frame_width) {
+          memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
+          memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+          vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width);
+        }
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
         } else {
           memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
         }
-      } else if (x0 + bs <= frame_width) {
-        const int r = frame_width - x0;
-        if (right_available && bs == 4) {
+      }
+      above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+    } else {
+      vpx_memset16(above_row, base - 1, bs);
+      above_row[-1] = base - 1;
+    }
+  }
+
+  // NEED_ABOVERIGHT
+  if (need_aboveright) {
+    if (up_available) {
+      const uint16_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + 2 * bs <= frame_width) {
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
+          } else {
+            memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 + bs <= frame_width) {
+          const int r = frame_width - x0;
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+            vpx_memset16(above_row + r, above_row[r - 1],
+                         x0 + 2 * bs - frame_width);
+          } else {
+            memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
           memcpy(above_row, above_ref, r * sizeof(above_row[0]));
           vpx_memset16(above_row + r, above_row[r - 1],
                        x0 + 2 * bs - frame_width);
+        }
+        above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
         } else {
           memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          if (bs == 4 && right_available)
+            memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
+          else
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          above_row[-1] = left_available ? above_ref[-1] : (base + 1);
         }
-      } else if (x0 <= frame_width) {
-        const int r = frame_width - x0;
-        memcpy(above_row, above_ref, r * sizeof(above_row[0]));
-        vpx_memset16(above_row + r, above_row[r - 1],
-                       x0 + 2 * bs - frame_width);
       }
-      // TODO(Peter) this value should probably change for high bitdepth
-      above_row[-1] = left_available ? above_ref[-1] : (base+1);
     } else {
-      /* faster path if the block does not need extension */
-      if (bs == 4 && right_available && left_available) {
-        const_above_row = above_ref;
-      } else {
-        memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-        if (bs == 4 && right_available)
-          memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
-        else
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
-        // TODO(Peter): this value should probably change for high bitdepth
-        above_row[-1] = left_available ? above_ref[-1] : (base+1);
-      }
+      vpx_memset16(above_row, base - 1, bs * 2);
+      above_row[-1] = base - 1;
     }
-  } else {
-    vpx_memset16(above_row, base - 1, bs * 2);
-    // TODO(Peter): this value should probably change for high bitdepth
-    above_row[-1] = base - 1;
   }
 
   // predict
@@ -391,8 +422,8 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in,
                              int aoff, int loff, int plane) {
   const int bw = (1 << bwl_in);
   const int txw = (1 << tx_size);
-  const int have_top = loff || xd->up_available;
-  const int have_left = aoff || xd->left_available;
+  const int have_top = loff || (xd->above_mi != NULL);
+  const int have_left = aoff || (xd->left_mi != NULL);
   const int have_right = (aoff + txw) < bw;
   const int x = aoff * 4;
   const int y = loff * 4;
diff --git a/libvpx/vp9/common/vp9_rtcd_defs.pl b/libvpx/vp9/common/vp9_rtcd_defs.pl
index 5bf71ef9f..846133674 100644
--- a/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -70,10 +70,6 @@ add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8
 specialize qw/vp9_post_proc_down_and_across sse2/;
 $vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
 
-add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
-specialize qw/vp9_plane_add_noise sse2/;
-$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
-
 add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
 specialize qw/vp9_filter_by_weight16x16 sse2 msa/;
 
@@ -169,9 +165,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
 
     add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
     specialize qw/vp9_highbd_post_proc_down_and_across/;
-
-    add_proto qw/void vp9_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
-    specialize qw/vp9_highbd_plane_add_noise/;
   }
 
   #
@@ -194,42 +187,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
 #
 if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
 
-add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
-specialize qw/vp9_avg_8x8 sse2 neon msa/;
-
-add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
-specialize qw/vp9_avg_4x4 sse2 msa/;
-
-add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
-specialize qw/vp9_minmax_8x8 sse2/;
-
-add_proto qw/void vp9_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
-specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
-
-add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
-specialize qw/vp9_hadamard_16x16 sse2/;
-
-add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length";
-specialize qw/vp9_satd sse2/;
-
-add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height";
-specialize qw/vp9_int_pro_row sse2 neon/;
-
-add_proto qw/int16_t vp9_int_pro_col/, "uint8_t const *ref, const int width";
-specialize qw/vp9_int_pro_col sse2 neon/;
-
-add_proto qw/int vp9_vector_var/, "int16_t const *ref, int16_t const *src, const int bwl";
-specialize qw/vp9_vector_var neon sse2/;
-
-if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
-  add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p";
-  specialize qw/vp9_highbd_avg_8x8/;
-  add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p";
-  specialize qw/vp9_highbd_avg_4x4/;
-  add_proto qw/void vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
-  specialize qw/vp9_highbd_minmax_8x8/;
-}
-
 # ENCODEMB INVOKE
 
 #
@@ -288,7 +245,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vp9_fht16x16 sse2/;
 
   add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
+  specialize qw/vp9_fwht4x4/, "$sse2_x86inc";
 } else {
   add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
   specialize qw/vp9_fht4x4 sse2 msa/;
@@ -300,7 +257,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vp9_fht16x16 sse2 msa/;
 
   add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/vp9_fwht4x4 msa/, "$mmx_x86inc";
+  specialize qw/vp9_fwht4x4 msa/, "$sse2_x86inc";
 }
 
 #
@@ -312,10 +269,7 @@ $vp9_full_search_sad_sse3=vp9_full_search_sadx3;
 $vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
 
 add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
-specialize qw/vp9_diamond_search_sad/;
-
-add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
-specialize qw/vp9_full_range_search/;
+specialize qw/vp9_diamond_search_sad avx/;
 
 add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
 specialize qw/vp9_temporal_filter_apply sse2 msa/;
@@ -349,6 +303,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
 }
 # End vp9_high encoder functions
 
+#
+# frame based scale
+#
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+} else {
+  add_proto qw/void vp9_scale_and_extend_frame/, "const struct yv12_buffer_config *src, struct yv12_buffer_config *dst";
+  specialize qw/vp9_scale_and_extend_frame ssse3/;
+}
+
 }
 # end encoder functions
 1;
diff --git a/libvpx/vp9/common/vp9_scan.c b/libvpx/vp9/common/vp9_scan.c
index d6fb8b2d7..8b8b09f4a 100644
--- a/libvpx/vp9/common/vp9_scan.c
+++ b/libvpx/vp9/common/vp9_scan.c
@@ -229,10 +229,8 @@ DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = {
   990, 959, 1022, 991, 1023,
 };
 
-// Neighborhood 5-tuples for various scans and blocksizes,
-// in {top, left, topleft, topright, bottomleft} order
-// for each position in raster scan order.
-// -1 indicates the neighbor does not exist.
+// Neighborhood 2-tuples for various scans and blocksizes,
+// in {top, left} order for each position in corresponding scan order.
 DECLARE_ALIGNED(16, static const int16_t,
                 default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
   0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9,
diff --git a/libvpx/vp9/common/vp9_scan.h b/libvpx/vp9/common/vp9_scan.h
index 1d86b5cfe..4c1ee8107 100644
--- a/libvpx/vp9/common/vp9_scan.h
+++ b/libvpx/vp9/common/vp9_scan.h
@@ -42,7 +42,7 @@ static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
                                          PLANE_TYPE type, int block_idx) {
   const MODE_INFO *const mi = xd->mi[0];
 
-  if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) {
+  if (is_inter_block(mi) || type != PLANE_TYPE_Y || xd->lossless) {
     return &vp9_default_scan_orders[tx_size];
   } else {
     const PREDICTION_MODE mode = get_y_mode(mi, block_idx);
diff --git a/libvpx/vp9/common/vp9_seg_common.c b/libvpx/vp9/common/vp9_seg_common.c
index c8ef618b7..7af61629a 100644
--- a/libvpx/vp9/common/vp9_seg_common.c
+++ b/libvpx/vp9/common/vp9_seg_common.c
@@ -28,6 +28,7 @@ static const int seg_feature_data_max[SEG_LVL_MAX] = {
 void vp9_clearall_segfeatures(struct segmentation *seg) {
   vp9_zero(seg->feature_data);
   vp9_zero(seg->feature_mask);
+  seg->aq_av_offset = 0;
 }
 
 void vp9_enable_segfeature(struct segmentation *seg, int segment_id,
diff --git a/libvpx/vp9/common/vp9_seg_common.h b/libvpx/vp9/common/vp9_seg_common.h
index 5b75d8d4e..99a9440c1 100644
--- a/libvpx/vp9/common/vp9_seg_common.h
+++ b/libvpx/vp9/common/vp9_seg_common.h
@@ -46,7 +46,8 @@ struct segmentation {
   vpx_prob pred_probs[PREDICTION_PROBS];
 
   int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
-  unsigned int feature_mask[MAX_SEGMENTS];
+  uint32_t feature_mask[MAX_SEGMENTS];
+  int aq_av_offset;
 };
 
 static INLINE int segfeature_active(const struct segmentation *seg,
diff --git a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
index 8d312d03f..1c77b57ff 100644
--- a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "./vp9_rtcd.h"
 #include "vpx_dsp/x86/inv_txfm_sse2.h"
 #include "vpx_dsp/x86/txfm_common_sse2.h"
 #include "vpx_ports/mem.h"
diff --git a/libvpx/vp9/common/x86/vp9_postproc_sse2.asm b/libvpx/vp9/common/x86/vp9_postproc_sse2.asm
index ec8bfdb18..430762815 100644
--- a/libvpx/vp9/common/x86/vp9_postproc_sse2.asm
+++ b/libvpx/vp9/common/x86/vp9_postproc_sse2.asm
@@ -624,68 +624,6 @@ sym(vp9_mbpost_proc_across_ip_xmm):
 %undef flimit4
 
 
-;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
-;                            unsigned char blackclamp[16],
-;                            unsigned char whiteclamp[16],
-;                            unsigned char bothclamp[16],
-;                            unsigned int width, unsigned int height, int pitch)
-global sym(vp9_plane_add_noise_wmt) PRIVATE
-sym(vp9_plane_add_noise_wmt):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 8
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-.addnoise_loop:
-    call sym(LIBVPX_RAND) WRT_PLT
-    mov     rcx, arg(1) ;noise
-    and     rax, 0xff
-    add     rcx, rax
-
-    ; we rely on the fact that the clamping vectors are stored contiguously
-    ; in black/white/both order. Note that we have to reload this here because
-    ; rdx could be trashed by rand()
-    mov     rdx, arg(2) ; blackclamp
-
-
-            mov     rdi, rcx
-            movsxd  rcx, dword arg(5) ;[Width]
-            mov     rsi, arg(0) ;Pos
-            xor         rax,rax
-
-.addnoise_nextset:
-            movdqu      xmm1,[rsi+rax]         ; get the source
-
-            psubusb     xmm1, [rdx]    ;blackclamp        ; clamp both sides so we don't outrange adding noise
-            paddusb     xmm1, [rdx+32] ;bothclamp
-            psubusb     xmm1, [rdx+16] ;whiteclamp
-
-            movdqu      xmm2,[rdi+rax]         ; get the noise for this line
-            paddb       xmm1,xmm2              ; add it in
-            movdqu      [rsi+rax],xmm1         ; store the result
-
-            add         rax,16                 ; move to the next line
-
-            cmp         rax, rcx
-            jl          .addnoise_nextset
-
-    movsxd  rax, dword arg(7) ; Pitch
-    add     arg(0), rax ; Start += Pitch
-    sub     dword arg(6), 1   ; Height -= 1
-    jg      .addnoise_loop
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
 SECTION_RODATA
 align 16
 rd42:
author	Johann <johannkoenig@google.com>	2016-07-21 12:09:52 -0700
committer	Johann <johannkoenig@google.com>	2016-07-21 12:09:52 -0700
commit	68e1c830ade592be74773e249bf94e2bbfb50de7 (patch)
tree	08299f7deb6079690f0a3d2118ef3882fa77bdc6 /libvpx/vp9/common
parent	96ebd06cb9832f583f7c181ec886eade209524df (diff)
download	libvpx-68e1c830ade592be74773e249bf94e2bbfb50de7.tar.gz